| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327 |
- import os
- import cv2
- import time
- import random
- import numpy as np
- import torch
- from torch.utils.data import Dataset
- try:
- from pycocotools.coco import COCO
- except:
- print("It seems that the COCOAPI is not installed.")
- try:
- from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
- except:
- from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
- coco_class_labels = ('background',
- 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck',
- 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign',
- 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
- 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella',
- 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
- 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
- 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass',
- 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
- 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
- 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk',
- 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
- 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book',
- 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
- coco_class_index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
- 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67,
- 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
- class COCODataset(Dataset):
- """
- COCO dataset class.
- """
- def __init__(self,
- img_size :int = 640,
- data_dir :str = None,
- image_set :str = 'train2017',
- trans_config = None,
- transform = None,
- is_train :bool =False,
- load_cache :str = None):
- """
- COCO dataset initialization. Annotation data are read into memory by COCO API.
- Args:
- data_dir (str): dataset root directory
- json_file (str): COCO json file name
- name (str): COCO data name (e.g. 'train2017' or 'val2017')
- debug (bool): if True, only one data id is selected from the dataset
- """
- # ----------- Basic parameters -----------
- self.img_size = img_size
- self.image_set = image_set
- self.is_train = is_train
- self.load_cache = load_cache
- # ----------- Path parameters -----------
- self.data_dir = data_dir
- if image_set == 'train2017':
- self.json_file='instances_train2017.json'
- elif image_set == 'val2017':
- self.json_file='instances_val2017.json'
- elif image_set == 'test2017':
- self.json_file='image_info_test-dev2017.json'
- # ----------- Data parameters -----------
- self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
- self.ids = self.coco.getImgIds()
- self.class_ids = sorted(self.coco.getCatIds())
- self.dataset_size = len(self.ids)
- # ----------- Transform parameters -----------
- self.transform = transform
- self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
- self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
- self.trans_config = trans_config
- print('==============================')
- print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
- print('use Mixup Augmentation: {}'.format(self.mixup_prob))
- print('==============================')
-
- # load cache data
- if load_cache is not None:
- self._load_cache()
- # ------------ Basic dataset function ------------
- def __len__(self):
- return len(self.ids)
- def __getitem__(self, index):
- return self.pull_item(index)
- def _load_cache(self):
- # load image cache
- try:
- print("Loading cached data ...")
- self.cached_datas = torch.load(self.load_cache)
- self.dataset_size = len(self.cached_datas)
- print("Loading done !")
- except:
- self.cached_datas = None
- self.load_cache = None
- print("{} does not exits.".format(self.load_cache))
- # ------------ Mosaic & Mixup ------------
- def load_mosaic(self, index):
- # load 4x mosaic image
- index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
- id1 = index
- id2, id3, id4 = random.sample(index_list, 3)
- indexs = [id1, id2, id3, id4]
- # load images and targets
- image_list = []
- target_list = []
- for index in indexs:
- img_i, target_i = self.load_image_target(index)
- image_list.append(img_i)
- target_list.append(target_i)
- # Mosaic
- if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
- image, target = yolov5_mosaic_augment(
- image_list, target_list, self.img_size, self.trans_config, self.is_train)
- return image, target
- def load_mixup(self, origin_image, origin_target):
- # YOLOv5 type Mixup
- if self.trans_config['mixup_type'] == 'yolov5_mixup':
- new_index = np.random.randint(0, len(self.ids))
- new_image, new_target = self.load_mosaic(new_index)
- image, target = yolov5_mixup_augment(
- origin_image, origin_target, new_image, new_target)
- # YOLOX type Mixup
- elif self.trans_config['mixup_type'] == 'yolox_mixup':
- new_index = np.random.randint(0, len(self.ids))
- new_image, new_target = self.load_image_target(new_index)
- image, target = yolox_mixup_augment(
- origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
- return image, target
-
- # ------------ Load data function ------------
- def load_image_target(self, index):
- # == Load a data from the cached data ==
- if self.load_cache is not None and self.is_train:
- # load a data
- data_item = self.cached_datas[index]
- image = data_item["image"]
- target = data_item["target"]
- # == Load a data from the local disk ==
- else:
- # load an image
- image, _ = self.pull_image(index)
- height, width, channels = image.shape
- # load a target
- bboxes, labels = self.pull_anno(index)
- target = {
- "boxes": bboxes,
- "labels": labels,
- "orig_size": [height, width]
- }
- return image, target
- def pull_item(self, index):
- if random.random() < self.mosaic_prob:
- # load a mosaic image
- mosaic = True
- image, target = self.load_mosaic(index)
- else:
- mosaic = False
- # load an image and target
- image, target = self.load_image_target(index)
- # MixUp
- if random.random() < self.mixup_prob:
- image, target = self.load_mixup(image, target)
- # augment
- image, target, deltas = self.transform(image, target, mosaic)
- return image, target, deltas
- def pull_image(self, index):
- img_id = self.ids[index]
- img_file = os.path.join(self.data_dir, self.image_set,
- '{:012}'.format(img_id) + '.jpg')
- image = cv2.imread(img_file)
- if self.json_file == 'instances_val5k.json' and image is None:
- img_file = os.path.join(self.data_dir, 'train2017',
- '{:012}'.format(img_id) + '.jpg')
- image = cv2.imread(img_file)
- assert image is not None
- return image, img_id
- def pull_anno(self, index):
- img_id = self.ids[index]
- im_ann = self.coco.loadImgs(img_id)[0]
- anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=False)
- annotations = self.coco.loadAnns(anno_ids)
- # image infor
- width = im_ann['width']
- height = im_ann['height']
-
- #load a target
- bboxes = []
- labels = []
- for anno in annotations:
- if 'bbox' in anno and anno['area'] > 0:
- # bbox
- x1 = np.max((0, anno['bbox'][0]))
- y1 = np.max((0, anno['bbox'][1]))
- x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
- y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
- if x2 < x1 or y2 < y1:
- continue
- # class label
- cls_id = self.class_ids.index(anno['category_id'])
-
- bboxes.append([x1, y1, x2, y2])
- labels.append(cls_id)
- # guard against no boxes via resizing
- bboxes = np.array(bboxes).reshape(-1, 4)
- labels = np.array(labels).reshape(-1)
-
- return bboxes, labels
- if __name__ == "__main__":
- import argparse
- from build import build_transform
-
- parser = argparse.ArgumentParser(description='COCO-Dataset')
- # opt
- parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
- help='data root')
- parser.add_argument('-size', '--img_size', default=640, type=int,
- help='input image size.')
- parser.add_argument('--mosaic', default=None, type=float,
- help='mosaic augmentation.')
- parser.add_argument('--mixup', default=None, type=float,
- help='mixup augmentation.')
- parser.add_argument('--is_train', action="store_true", default=False,
- help='mixup augmentation.')
- parser.add_argument('--load_cache', action="store_true", default=False,
- help='load cached data.')
-
- args = parser.parse_args()
- trans_config = {
- 'aug_type': 'yolov5', # optional: ssd, yolov5
- # Basic Augment
- 'degrees': 0.0,
- 'translate': 0.2,
- 'scale': [0.5, 2.0],
- 'shear': 0.0,
- 'perspective': 0.0,
- 'hsv_h': 0.015,
- 'hsv_s': 0.7,
- 'hsv_v': 0.4,
- # Mosaic & Mixup
- 'mosaic_prob': 1.0,
- 'mixup_prob': 1.0,
- 'mosaic_type': 'yolov5_mosaic',
- 'mixup_type': 'yolov5_mixup',
- 'mixup_scale': [0.5, 1.5]
- }
- transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
- dataset = COCODataset(
- img_size=args.img_size,
- data_dir=args.root,
- image_set='val2017',
- trans_config=trans_config,
- transform=transform,
- is_train=args.is_train,
- load_cache=args.load_cache
- )
-
- np.random.seed(0)
- class_colors = [(np.random.randint(255),
- np.random.randint(255),
- np.random.randint(255)) for _ in range(80)]
- print('Data length: ', len(dataset))
- for i in range(1000):
- image, target, deltas = dataset.pull_item(i)
- # to numpy
- image = image.permute(1, 2, 0).numpy()
- # to uint8
- image = image.astype(np.uint8)
- image = image.copy()
- img_h, img_w = image.shape[:2]
- boxes = target["boxes"]
- labels = target["labels"]
- for box, label in zip(boxes, labels):
- x1, y1, x2, y2 = box
- cls_id = int(label)
- color = class_colors[cls_id]
- # class name
- label = coco_class_labels[coco_class_index[cls_id]]
- image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
- # put the test on the bbox
- cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
- cv2.imshow('gt', image)
- # cv2.imwrite(str(i)+'.jpg', img)
- cv2.waitKey(0)
|