Source code for reid.datasets.cuhk03

from __future__ import print_function, absolute_import
import os.path as osp

import numpy as np

from ..utils.data import Dataset
from ..utils.osutils import mkdir_if_missing
from ..utils.serialization import write_json


[docs]class CUHK03(Dataset): url = 'https://docs.google.com/spreadsheet/viewform?usp=drive_web&formkey=dHRkMkFVSUFvbTJIRkRDLWRwZWpONnc6MA#gid=0' md5 = '728939e58ad9f0ff53e521857dd8fb43' def __init__(self, root, split_id=0, num_val=100, download=True): super(CUHK03, self).__init__(root, split_id=split_id) if download: self.download() if not self._check_integrity(): raise RuntimeError("Dataset not found or corrupted. " + "You can use download=True to download it.") self.load(num_val) def download(self): if self._check_integrity(): print("Files already downloaded and verified") return import h5py import hashlib from scipy.misc import imsave from zipfile import ZipFile raw_dir = osp.join(self.root, 'raw') mkdir_if_missing(raw_dir) # Download the raw zip file fpath = osp.join(raw_dir, 'cuhk03_release.zip') if osp.isfile(fpath) and \ hashlib.md5(open(fpath, 'rb').read()).hexdigest() == self.md5: print("Using downloaded file: " + fpath) else: raise RuntimeError("Please download the dataset manually from {} " "to {}".format(self.url, fpath)) # Extract the file exdir = osp.join(raw_dir, 'cuhk03_release') if not osp.isdir(exdir): print("Extracting zip file") with ZipFile(fpath) as z: z.extractall(path=raw_dir) # Format images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) matdata = h5py.File(osp.join(exdir, 'cuhk-03.mat'), 'r') def deref(ref): return matdata[ref][:].T def dump_(refs, pid, cam, fnames): for ref in refs: img = deref(ref) if img.size == 0 or img.ndim < 2: break fname = '{:08d}_{:02d}_{:04d}.jpg'.format(pid, cam, len(fnames)) imsave(osp.join(images_dir, fname), img) fnames.append(fname) identities = [] for labeled, detected in zip( matdata['labeled'][0], matdata['detected'][0]): labeled, detected = deref(labeled), deref(detected) assert labeled.shape == detected.shape for i in range(labeled.shape[0]): pid = len(identities) images = [[], []] dump_(labeled[i, :5], pid, 0, images[0]) dump_(detected[i, :5], pid, 0, images[0]) dump_(labeled[i, 5:], pid, 1, images[1]) dump_(detected[i, 5:], pid, 1, images[1]) identities.append(images) # Save meta information into a json file meta = {'name': 'cuhk03', 'shot': 'multiple', 'num_cameras': 2, 'identities': identities} write_json(meta, osp.join(self.root, 'meta.json')) # Save training and test splits splits = [] view_counts = [deref(ref).shape[0] for ref in matdata['labeled'][0]] vid_offsets = np.r_[0, np.cumsum(view_counts)] for ref in matdata['testsets'][0]: test_info = deref(ref).astype(np.int32) test_pids = sorted( [int(vid_offsets[i-1] + j - 1) for i, j in test_info]) trainval_pids = list(set(range(vid_offsets[-1])) - set(test_pids)) split = {'trainval': trainval_pids, 'query': test_pids, 'gallery': test_pids} splits.append(split) write_json(splits, osp.join(self.root, 'splits.json'))