Source code for reid.datasets.dukemtmc

from __future__ import print_function, absolute_import
import os
import os.path as osp

from ..utils.data import Dataset
from ..utils.osutils import mkdir_if_missing
from ..utils.serialization import write_json


[docs]class DukeMTMC(Dataset): url = 'https://drive.google.com/open?id=0B0VOCNYh8HeRSDRwczZIT0lZTG8' md5 = '286aaef9ba5db58853d91b66a028923b' def __init__(self, root, split_id=0, num_val=100, download=True): super(DukeMTMC, self).__init__(root, split_id=split_id) if download: self.download() if not self._check_integrity(): raise RuntimeError("Dataset not found or corrupted. " + "You can use download=True to download it.") self.load(num_val) def download(self): if self._check_integrity(): print("Files already downloaded and verified") return import re import hashlib import shutil import tarfile from glob import glob raw_dir = osp.join(self.root, 'raw') mkdir_if_missing(raw_dir) # Download the raw zip file fpath = osp.join(raw_dir, 'Duke.tar.gz') if osp.isfile(fpath) and \ hashlib.md5(open(fpath, 'rb').read()).hexdigest() == self.md5: print("Using downloaded file: " + fpath) else: raise RuntimeError("Please download the dataset manually from {} " "to {}".format(self.url, fpath)) # Extract the file exdir = osp.join(raw_dir, 'Duke') if not osp.isdir(exdir): mkdir_if_missing(exdir) print("Extracting tar file") cwd = os.getcwd() tar = tarfile.open(fpath, 'r:gz') os.chdir(exdir) tar.extractall() tar.close() os.chdir(cwd) # Format images_dir = osp.join(self.root, 'images') mkdir_if_missing(images_dir) identities = [] all_pids = {} def register(subdir, pattern=re.compile(r'([-\d]+)_c(\d)')): fpaths = sorted(glob(osp.join(exdir, subdir, '*.jpg'))) pids = set() for fpath in fpaths: fname = osp.basename(fpath) pid, cam = map(int, pattern.search(fname).groups()) assert 1 <= cam <= 8 cam -= 1 if pid not in all_pids: all_pids[pid] = len(all_pids) pid = all_pids[pid] pids.add(pid) if pid >= len(identities): assert pid == len(identities) identities.append([[] for _ in range(8)]) # 8 camera views fname = ('{:08d}_{:02d}_{:04d}.jpg' .format(pid, cam, len(identities[pid][cam]))) identities[pid][cam].append(fname) shutil.copy(fpath, osp.join(images_dir, fname)) return pids trainval_pids = register('bounding_box_train') gallery_pids = register('bounding_box_test') query_pids = register('query') assert query_pids <= gallery_pids assert trainval_pids.isdisjoint(gallery_pids) # Save meta information into a json file meta = {'name': 'DukeMTMC', 'shot': 'multiple', 'num_cameras': 8, 'identities': identities} write_json(meta, osp.join(self.root, 'meta.json')) # Save the only training / test split splits = [{ 'trainval': sorted(list(trainval_pids)), 'query': sorted(list(query_pids)), 'gallery': sorted(list(gallery_pids))}] write_json(splits, osp.join(self.root, 'splits.json'))