| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326 |
- import os
- import cv2
- import time
- import random
- import numpy as np
- from torch.utils.data import Dataset
- from pycocotools.coco import COCO
- try:
- from .data_augment.strong_augment import MosaicAugment, MixupAugment
- except:
- from data_augment.strong_augment import MosaicAugment, MixupAugment
- coco_class_indexs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
- coco_class_labels = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
- coco_json_files = {
- 'train2017_clean': 'instances_train2017_clean.json',
- 'val2017_clean' : 'instances_val2017_clean.json',
- 'train2017' : 'instances_train2017.json',
- 'val2017' : 'instances_val2017.json',
- 'test2017' : 'image_info_test.json',
- }
- class COCODataset(Dataset):
- def __init__(self,
- cfg,
- data_dir :str = None,
- image_set :str = 'train2017',
- transform = None,
- is_train :bool = False,
- use_mask :bool = False,
- ):
- # ----------- Basic parameters -----------
- self.data_dir = data_dir
- self.image_set = image_set
- self.is_train = is_train
- self.use_mask = use_mask
- self.num_classes = 80
- # ----------- Data parameters -----------
- try:
- self.json_file = coco_json_files['{}_clean'.format(image_set)]
- self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
- except:
- self.json_file = coco_json_files['{}'.format(image_set)]
- self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
- self.ids = self.coco.getImgIds()
- self.class_ids = sorted(self.coco.getCatIds())
- self.dataset_size = len(self.ids)
- self.class_labels = coco_class_labels
- self.class_indexs = coco_class_indexs
- # ----------- Transform parameters -----------
- self.transform = transform
- if is_train:
- self.mosaic_prob = cfg.mosaic_prob
- self.mixup_prob = cfg.mixup_prob
- self.copy_paste = cfg.copy_paste
- self.mosaic_augment = None if cfg.mosaic_prob == 0. else MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
- self.mixup_augment = None if cfg.mixup_prob == 0. and cfg.copy_paste == 0. else MixupAugment(cfg.train_img_size)
- else:
- self.mosaic_prob = 0.0
- self.mixup_prob = 0.0
- self.copy_paste = 0.0
- self.mosaic_augment = None
- self.mixup_augment = None
- print('==============================')
- print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
- print('use Mixup Augmentation: {}'.format(self.mixup_prob))
- print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
- # ------------ Basic dataset function ------------
- def __len__(self):
- return len(self.ids)
- def __getitem__(self, index):
- return self.pull_item(index)
- # ------------ Mosaic & Mixup ------------
- def load_mosaic(self, index):
- # ------------ Prepare 4 indexes of images ------------
- ## Load 4x mosaic image
- index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
- id1 = index
- id2, id3, id4 = random.sample(index_list, 3)
- indexs = [id1, id2, id3, id4]
- ## Load images and targets
- image_list = []
- target_list = []
- for index in indexs:
- img_i, target_i = self.load_image_target(index)
- image_list.append(img_i)
- target_list.append(target_i)
- # ------------ Mosaic augmentation ------------
- image, target = self.mosaic_augment(image_list, target_list)
- return image, target
- def load_mixup(self, origin_image, origin_target, yolox_style=False):
- # ------------ Load a new image & target ------------
- new_index = np.random.randint(0, len(self.ids))
- new_image, new_target = self.load_mosaic(new_index)
-
- # ------------ Mixup augmentation ------------
- image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target, yolox_style)
- return image, target
-
- # ------------ Load data function ------------
- def load_image_target(self, index):
- # load an image
- image, _ = self.pull_image(index)
- height, width, channels = image.shape
- # load a target
- bboxes, labels = self.pull_anno(index)
- target = {
- "boxes": bboxes,
- "labels": labels,
- "orig_size": [height, width]
- }
- return image, target
- def pull_item(self, index):
- if random.random() < self.mosaic_prob:
- # load a mosaic image
- mosaic = True
- image, target = self.load_mosaic(index)
- else:
- mosaic = False
- # load an image and target
- image, target = self.load_image_target(index)
- # Yolov5-MixUp
- mixup = False
- if random.random() < self.mixup_prob:
- mixup = True
- image, target = self.load_mixup(image, target)
- # Copy-paste (use Yolox-Mixup to approximate copy-paste)
- if not mixup and random.random() < self.copy_paste:
- image, target = self.load_mixup(image, target, yolox_style=True)
- # augment
- image, target, deltas = self.transform(image, target, mosaic)
- return image, target, deltas
- def pull_image(self, index):
- img_id = self.ids[index]
- img_file = os.path.join(self.data_dir, self.image_set,
- '{:012}'.format(img_id) + '.jpg')
- image = cv2.imread(img_file)
- if self.json_file == 'instances_val5k.json' and image is None:
- img_file = os.path.join(self.data_dir, 'train2017',
- '{:012}'.format(img_id) + '.jpg')
- image = cv2.imread(img_file)
- assert image is not None
- return image, img_id
- def pull_anno(self, index):
- img_id = self.ids[index]
- im_ann = self.coco.loadImgs(img_id)[0]
- anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=False)
- annotations = self.coco.loadAnns(anno_ids)
- # image infor
- width = im_ann['width']
- height = im_ann['height']
-
- #load a target
- bboxes = []
- labels = []
- for anno in annotations:
- if 'bbox' in anno and anno['area'] > 0:
- # bbox
- x1 = np.max((0, anno['bbox'][0]))
- y1 = np.max((0, anno['bbox'][1]))
- x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
- y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
- if x2 < x1 or y2 < y1:
- continue
- # class label
- cls_id = self.class_ids.index(anno['category_id'])
-
- bboxes.append([x1, y1, x2, y2])
- labels.append(cls_id)
- # guard against no boxes via resizing
- bboxes = np.array(bboxes).reshape(-1, 4)
- labels = np.array(labels).reshape(-1)
-
- return bboxes, labels
- if __name__ == "__main__":
- import time
- import argparse
- from build import build_transform
-
- parser = argparse.ArgumentParser(description='COCO-Dataset')
- # opt
- parser.add_argument('--root', default='D:/python_work/dataset/COCO/',
- help='data root')
- parser.add_argument('--is_train', action="store_true", default=False,
- help='mixup augmentation.')
- parser.add_argument('--aug_type', default="yolo", type=str, choices=["yolo", "ssd"],
- help='yolo, ssd.')
- args = parser.parse_args()
- class YoloBaseConfig(object):
- def __init__(self) -> None:
- self.max_stride = 32
- # ---------------- Data process config ----------------
- self.box_format = 'xywh'
- self.normalize_coords = False
- self.mosaic_prob = 1.0
- self.mixup_prob = 0.15
- self.copy_paste = 0.3
- ## Pixel mean & std
- self.pixel_mean = [0., 0., 0.]
- self.pixel_std = [255., 255., 255.]
- ## Transforms
- self.train_img_size = 640
- self.test_img_size = 640
- self.use_ablu = True
- self.aug_type = 'yolo'
- self.affine_params = {
- 'degrees': 0.0,
- 'translate': 0.2,
- 'scale': [0.1, 2.0],
- 'shear': 0.0,
- 'perspective': 0.0,
- 'hsv_h': 0.015,
- 'hsv_s': 0.7,
- 'hsv_v': 0.4,
- }
- class SSDBaseConfig(object):
- def __init__(self) -> None:
- self.max_stride = 32
- # ---------------- Data process config ----------------
- self.box_format = 'xywh'
- self.normalize_coords = False
- self.mosaic_prob = 0.0
- self.mixup_prob = 0.0
- self.copy_paste = 0.0
- ## Pixel mean & std
- self.pixel_mean = [0., 0., 0.]
- self.pixel_std = [255., 255., 255.]
- ## Transforms
- self.train_img_size = 640
- self.test_img_size = 640
- self.aug_type = 'ssd'
- if args.aug_type == "yolo":
- cfg = YoloBaseConfig()
- elif args.aug_type == "ssd":
- cfg = SSDBaseConfig()
- transform = build_transform(cfg, args.is_train)
- dataset = COCODataset(cfg, args.root, 'val2017', transform, args.is_train)
-
- np.random.seed(0)
- class_colors = [(np.random.randint(255),
- np.random.randint(255),
- np.random.randint(255)) for _ in range(80)]
- print('Data length: ', len(dataset))
- for i in range(1000):
- t0 = time.time()
- image, target, deltas = dataset.pull_item(i)
- print("Load data: {} s".format(time.time() - t0))
- # to numpy
- image = image.permute(1, 2, 0).numpy()
-
- # denormalize
- image = image * cfg.pixel_std + cfg.pixel_mean
- # rgb -> bgr
- if transform.color_format == 'rgb':
- image = image[..., (2, 1, 0)]
- # to uint8
- image = image.astype(np.uint8)
- image = image.copy()
- img_h, img_w = image.shape[:2]
- boxes = target["boxes"]
- labels = target["labels"]
- for box, label in zip(boxes, labels):
- if cfg.box_format == 'xyxy':
- x1, y1, x2, y2 = box
- elif cfg.box_format == 'xywh':
- cx, cy, bw, bh = box
- x1 = cx - 0.5 * bw
- y1 = cy - 0.5 * bh
- x2 = cx + 0.5 * bw
- y2 = cy + 0.5 * bh
-
- if cfg.normalize_coords:
- x1 *= img_w
- y1 *= img_h
- x2 *= img_w
- y2 *= img_h
- cls_id = int(label)
- color = class_colors[cls_id]
- # class name
- label = coco_class_labels[cls_id]
- image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
- # put the test on the bbox
- cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
- cv2.imshow('gt', image)
- # cv2.imwrite(str(i)+'.jpg', img)
- cv2.waitKey(0)
|