customed.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. import os
  2. import cv2
  3. import time
  4. import random
  5. import numpy as np
  6. import torch
  7. from torch.utils.data import Dataset
  8. try:
  9. from pycocotools.coco import COCO
  10. except:
  11. print("It seems that the COCOAPI is not installed.")
  12. try:
  13. from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
  14. except:
  15. from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
  16. class CustomedDataset(Dataset):
  17. def __init__(self,
  18. img_size :int = 640,
  19. data_dir :str = None,
  20. image_set :str = 'train',
  21. transform = None,
  22. trans_config = None,
  23. is_train :bool =False,
  24. load_cache :bool = False,
  25. ):
  26. # ----------- Basic parameters -----------
  27. self.img_size = img_size
  28. self.image_set = image_set
  29. self.is_train = is_train
  30. # ----------- Path parameters -----------
  31. self.data_dir = data_dir
  32. self.json_file = '{}.json'.format(image_set)
  33. # ----------- Data parameters -----------
  34. self.coco = COCO(os.path.join(self.data_dir, image_set, 'annotations', self.json_file))
  35. self.ids = self.coco.getImgIds()
  36. self.class_ids = sorted(self.coco.getCatIds())
  37. self.dataset_size = len(self.ids)
  38. # ----------- Transform parameters -----------
  39. self.transform = transform
  40. self.mosaic_prob = 0
  41. self.mixup_prob = 0
  42. self.trans_config = trans_config
  43. if trans_config is not None:
  44. self.mosaic_prob = trans_config['mosaic_prob']
  45. self.mixup_prob = trans_config['mixup_prob']
  46. print('==============================')
  47. print('Image Set: {}'.format(image_set))
  48. print('Json file: {}'.format(self.json_file))
  49. print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
  50. print('use Mixup Augmentation: {}'.format(self.mixup_prob))
  51. print('==============================')
  52. # ----------- Cached data -----------
  53. self.load_cache = load_cache
  54. self.cached_datas = None
  55. if self.load_cache:
  56. self.cached_datas = self._load_cache()
  57. # ------------ Basic dataset function ------------
  58. def __len__(self):
  59. return len(self.ids)
  60. def __getitem__(self, index):
  61. return self.pull_item(index)
  62. def _load_cache(self):
  63. data_items = []
  64. for idx in range(self.dataset_size):
  65. if idx % 2000 == 0:
  66. print("Caching images and targets : {} / {} ...".format(idx, self.dataset_size))
  67. # load a data
  68. image, target = self.load_image_target(idx)
  69. orig_h, orig_w, _ = image.shape
  70. # resize image
  71. r = self.img_size / max(orig_h, orig_w)
  72. if r != 1:
  73. interp = cv2.INTER_LINEAR
  74. new_size = (int(orig_w * r), int(orig_h * r))
  75. image = cv2.resize(image, new_size, interpolation=interp)
  76. img_h, img_w = image.shape[:2]
  77. # rescale bbox
  78. boxes = target["boxes"].copy()
  79. boxes[:, [0, 2]] = boxes[:, [0, 2]] / orig_w * img_w
  80. boxes[:, [1, 3]] = boxes[:, [1, 3]] / orig_h * img_h
  81. target["boxes"] = boxes
  82. dict_item = {}
  83. dict_item["image"] = image
  84. dict_item["target"] = target
  85. data_items.append(dict_item)
  86. return data_items
  87. # ------------ Mosaic & Mixup ------------
  88. def load_mosaic(self, index):
  89. # load 4x mosaic image
  90. index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
  91. id1 = index
  92. id2, id3, id4 = random.sample(index_list, 3)
  93. indexs = [id1, id2, id3, id4]
  94. # load images and targets
  95. image_list = []
  96. target_list = []
  97. for index in indexs:
  98. img_i, target_i = self.load_image_target(index)
  99. image_list.append(img_i)
  100. target_list.append(target_i)
  101. # Mosaic
  102. if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
  103. image, target = yolov5_mosaic_augment(
  104. image_list, target_list, self.img_size, self.trans_config, self.trans_config['mosaic_keep_ratio'], self.is_train)
  105. return image, target
  106. def load_mixup(self, origin_image, origin_target):
  107. # YOLOv5 type Mixup
  108. if self.trans_config['mixup_type'] == 'yolov5_mixup':
  109. new_index = np.random.randint(0, len(self.ids))
  110. new_image, new_target = self.load_mosaic(new_index)
  111. image, target = yolov5_mixup_augment(
  112. origin_image, origin_target, new_image, new_target)
  113. # YOLOX type Mixup
  114. elif self.trans_config['mixup_type'] == 'yolox_mixup':
  115. new_index = np.random.randint(0, len(self.ids))
  116. new_image, new_target = self.load_image_target(new_index)
  117. image, target = yolox_mixup_augment(
  118. origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
  119. return image, target
  120. # ------------ Load data function ------------
  121. def load_image_target(self, index):
  122. # == Load a data from the cached data ==
  123. if self.cached_datas is not None:
  124. # load a data
  125. data_item = self.cached_datas[index]
  126. image = data_item["image"]
  127. target = data_item["target"]
  128. # == Load a data from the local disk ==
  129. else:
  130. # load an image
  131. image, _ = self.pull_image(index)
  132. height, width, channels = image.shape
  133. # load a target
  134. bboxes, labels = self.pull_anno(index)
  135. target = {
  136. "boxes": bboxes,
  137. "labels": labels,
  138. "orig_size": [height, width]
  139. }
  140. return image, target
  141. def pull_item(self, index):
  142. if random.random() < self.mosaic_prob:
  143. # load a mosaic image
  144. mosaic = True
  145. image, target = self.load_mosaic(index)
  146. else:
  147. mosaic = False
  148. # load an image and target
  149. image, target = self.load_image_target(index)
  150. # MixUp
  151. if random.random() < self.mixup_prob:
  152. image, target = self.load_mixup(image, target)
  153. # augment
  154. image, target, deltas = self.transform(image, target, mosaic)
  155. return image, target, deltas
  156. def pull_image(self, index):
  157. id_ = self.ids[index]
  158. im_ann = self.coco.loadImgs(id_)[0]
  159. img_file = os.path.join(
  160. self.data_dir, self.image_set, 'images', im_ann["file_name"])
  161. image = cv2.imread(img_file)
  162. return image, id_
  163. def pull_anno(self, index):
  164. img_id = self.ids[index]
  165. im_ann = self.coco.loadImgs(img_id)[0]
  166. anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=0)
  167. annotations = self.coco.loadAnns(anno_ids)
  168. # image infor
  169. width = im_ann['width']
  170. height = im_ann['height']
  171. #load a target
  172. bboxes = []
  173. labels = []
  174. for anno in annotations:
  175. if 'bbox' in anno and anno['area'] > 0:
  176. # bbox
  177. x1 = np.max((0, anno['bbox'][0]))
  178. y1 = np.max((0, anno['bbox'][1]))
  179. x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
  180. y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
  181. if x2 <= x1 or y2 <= y1:
  182. continue
  183. # class label
  184. cls_id = self.class_ids.index(anno['category_id'])
  185. bboxes.append([x1, y1, x2, y2])
  186. labels.append(cls_id)
  187. # guard against no boxes via resizing
  188. bboxes = np.array(bboxes).reshape(-1, 4)
  189. labels = np.array(labels).reshape(-1)
  190. return bboxes, labels
  191. if __name__ == "__main__":
  192. import time
  193. import argparse
  194. from build import build_transform
  195. import sys
  196. sys.path.append("..")
  197. from config.data_config.dataset_config import dataset_cfg
  198. data_config = dataset_cfg["customed"]
  199. categories = data_config["class_names"]
  200. parser = argparse.ArgumentParser(description='RT-ODLab')
  201. # opt
  202. parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/AnimalDataset/',
  203. help='data root')
  204. parser.add_argument('--split', default='train',
  205. help='data split')
  206. parser.add_argument('-size', '--img_size', default=640, type=int,
  207. help='input image size')
  208. parser.add_argument('--min_box_size', default=8.0, type=float,
  209. help='min size of target bounding box.')
  210. parser.add_argument('--mosaic', default=None, type=float,
  211. help='mosaic augmentation.')
  212. parser.add_argument('--mixup', default=None, type=float,
  213. help='mixup augmentation.')
  214. parser.add_argument('--is_train', action="store_true", default=False,
  215. help='mixup augmentation.')
  216. parser.add_argument('--load_cache', action="store_true", default=False,
  217. help='load cached data.')
  218. args = parser.parse_args()
  219. trans_config = {
  220. 'aug_type': args.aug_type, # optional: ssd, yolov5
  221. 'pixel_mean': [0., 0., 0.],
  222. 'pixel_std': [255., 255., 255.],
  223. # Basic Augment
  224. 'degrees': 0.0,
  225. 'translate': 0.2,
  226. 'scale': [0.1, 2.0],
  227. 'shear': 0.0,
  228. 'perspective': 0.0,
  229. 'hsv_h': 0.015,
  230. 'hsv_s': 0.7,
  231. 'hsv_v': 0.4,
  232. 'use_ablu': True,
  233. # Mosaic & Mixup
  234. 'mosaic_prob': args.mosaic,
  235. 'mixup_prob': args.mixup,
  236. 'mosaic_type': 'yolov5_mosaic',
  237. 'mixup_type': args.mixup_type, # optional: yolov5_mixup, yolox_mixup
  238. 'mosaic_keep_ratio': False,
  239. 'mixup_scale': [0.5, 1.5]
  240. }
  241. transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
  242. pixel_mean = transform.pixel_mean
  243. pixel_std = transform.pixel_std
  244. color_format = transform.color_format
  245. dataset = CustomedDataset(
  246. img_size=args.img_size,
  247. data_dir=args.root,
  248. image_set=args.split,
  249. transform=transform,
  250. trans_config=trans_config,
  251. is_train=args.is_train,
  252. load_cache=args.load_cache
  253. )
  254. np.random.seed(0)
  255. class_colors = [(np.random.randint(255),
  256. np.random.randint(255),
  257. np.random.randint(255)) for _ in range(80)]
  258. print('Data length: ', len(dataset))
  259. for i in range(1000):
  260. t0 = time.time()
  261. image, target, deltas = dataset.pull_item(i)
  262. print("Load data: {} s".format(time.time() - t0))
  263. # to numpy
  264. image = image.permute(1, 2, 0).numpy()
  265. # denormalize
  266. image = image * pixel_std + pixel_mean
  267. if color_format == 'rgb':
  268. # RGB to BGR
  269. image = image[..., (2, 1, 0)]
  270. # to uint8
  271. image = image.astype(np.uint8)
  272. image = image.copy()
  273. img_h, img_w = image.shape[:2]
  274. boxes = target["boxes"]
  275. labels = target["labels"]
  276. for box, label in zip(boxes, labels):
  277. x1, y1, x2, y2 = box
  278. cls_id = int(label)
  279. color = class_colors[cls_id]
  280. # class name
  281. label = categories[cls_id]
  282. if x2 - x1 > 0. and y2 - y1 > 0.:
  283. # draw bbox
  284. image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
  285. # put the test on the bbox
  286. cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
  287. cv2.imshow('gt', image)
  288. # cv2.imwrite(str(i)+'.jpg', img)
  289. cv2.waitKey(0)