yolov5_augment.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. import random
  2. import cv2
  3. import math
  4. import numpy as np
  5. import torch
  6. import albumentations as albu
  7. # ------------------------- Basic augmentations -------------------------
  8. ## Spatial transform
  9. def random_perspective(image,
  10. targets=(),
  11. degrees=10,
  12. translate=.1,
  13. scale=[0.1, 2.0],
  14. shear=10,
  15. perspective=0.0,
  16. border=(0, 0)):
  17. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
  18. # targets = [cls, xyxy]
  19. height = image.shape[0] + border[0] * 2 # shape(h,w,c)
  20. width = image.shape[1] + border[1] * 2
  21. # Center
  22. C = np.eye(3)
  23. C[0, 2] = -image.shape[1] / 2 # x translation (pixels)
  24. C[1, 2] = -image.shape[0] / 2 # y translation (pixels)
  25. # Perspective
  26. P = np.eye(3)
  27. P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
  28. P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
  29. # Rotation and Scale
  30. R = np.eye(3)
  31. a = random.uniform(-degrees, degrees)
  32. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  33. s = random.uniform(scale[0], scale[1])
  34. # s = 2 ** random.uniform(-scale, scale)
  35. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  36. # Shear
  37. S = np.eye(3)
  38. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  39. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  40. # Translation
  41. T = np.eye(3)
  42. T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
  43. T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
  44. # Combined rotation matrix
  45. M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
  46. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  47. if perspective:
  48. image = cv2.warpPerspective(image, M, dsize=(width, height), borderValue=(114, 114, 114))
  49. else: # affine
  50. image = cv2.warpAffine(image, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
  51. # Transform label coordinates
  52. n = len(targets)
  53. if n:
  54. new = np.zeros((n, 4))
  55. # warp boxes
  56. xy = np.ones((n * 4, 3))
  57. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  58. xy = xy @ M.T # transform
  59. xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
  60. # create new boxes
  61. x = xy[:, [0, 2, 4, 6]]
  62. y = xy[:, [1, 3, 5, 7]]
  63. new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  64. # clip
  65. new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
  66. new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
  67. targets[:, 1:5] = new
  68. return image, targets
  69. ## Color transform
  70. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  71. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  72. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  73. dtype = img.dtype # uint8
  74. x = np.arange(0, 256, dtype=np.int16)
  75. lut_hue = ((x * r[0]) % 180).astype(dtype)
  76. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  77. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  78. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  79. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  80. ## Ablu transform
  81. class Albumentations(object):
  82. def __init__(self, img_size=640):
  83. self.img_size = img_size
  84. self.transform = albu.Compose(
  85. [albu.Blur(p=0.01),
  86. albu.MedianBlur(p=0.01),
  87. albu.ToGray(p=0.01),
  88. albu.CLAHE(p=0.01),
  89. ],
  90. bbox_params=albu.BboxParams(format='pascal_voc', label_fields=['labels'])
  91. )
  92. def __call__(self, image, target=None):
  93. labels = target['labels']
  94. bboxes = target['boxes']
  95. if len(labels) > 0:
  96. new = self.transform(image=image, bboxes=bboxes, labels=labels)
  97. if len(new["labels"]) > 0:
  98. image = new['image']
  99. target['labels'] = np.array(new["labels"], dtype=labels.dtype)
  100. target['boxes'] = np.array(new["bboxes"], dtype=bboxes.dtype)
  101. return image, target
  102. # ------------------------- Strong augmentations -------------------------
  103. ## YOLOv5-Mosaic
  104. def yolov5_mosaic_augment(image_list, target_list, img_size, affine_params, keep_ratio=True, is_train=False):
  105. assert len(image_list) == 4
  106. mosaic_img = np.ones([img_size*2, img_size*2, image_list[0].shape[2]], dtype=np.uint8) * 114
  107. # mosaic center
  108. yc, xc = [int(random.uniform(-x, 2*img_size + x)) for x in [-img_size // 2, -img_size // 2]]
  109. # yc = xc = self.img_size
  110. mosaic_bboxes = []
  111. mosaic_labels = []
  112. for i in range(4):
  113. img_i, target_i = image_list[i], target_list[i]
  114. bboxes_i = target_i["boxes"]
  115. labels_i = target_i["labels"]
  116. orig_h, orig_w, _ = img_i.shape
  117. # resize
  118. if keep_ratio:
  119. r = img_size / max(orig_h, orig_w)
  120. if r != 1:
  121. interp = cv2.INTER_LINEAR if (is_train or r > 1) else cv2.INTER_AREA
  122. img_i = cv2.resize(img_i, (int(orig_w * r), int(orig_h * r)), interpolation=interp)
  123. else:
  124. interp = cv2.INTER_LINEAR if is_train else cv2.INTER_AREA
  125. img_i = cv2.resize(img_i, (img_size, img_size), interpolation=interp)
  126. h, w, _ = img_i.shape
  127. # place img in img4
  128. if i == 0: # top left
  129. x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
  130. x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
  131. elif i == 1: # top right
  132. x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, img_size * 2), yc
  133. x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
  134. elif i == 2: # bottom left
  135. x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(img_size * 2, yc + h)
  136. x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
  137. elif i == 3: # bottom right
  138. x1a, y1a, x2a, y2a = xc, yc, min(xc + w, img_size * 2), min(img_size * 2, yc + h)
  139. x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
  140. mosaic_img[y1a:y2a, x1a:x2a] = img_i[y1b:y2b, x1b:x2b]
  141. padw = x1a - x1b
  142. padh = y1a - y1b
  143. # labels
  144. bboxes_i_ = bboxes_i.copy()
  145. if len(bboxes_i) > 0:
  146. # a valid target, and modify it.
  147. bboxes_i_[:, 0] = (w * bboxes_i[:, 0] / orig_w + padw)
  148. bboxes_i_[:, 1] = (h * bboxes_i[:, 1] / orig_h + padh)
  149. bboxes_i_[:, 2] = (w * bboxes_i[:, 2] / orig_w + padw)
  150. bboxes_i_[:, 3] = (h * bboxes_i[:, 3] / orig_h + padh)
  151. mosaic_bboxes.append(bboxes_i_)
  152. mosaic_labels.append(labels_i)
  153. if len(mosaic_bboxes) == 0:
  154. mosaic_bboxes = np.array([]).reshape(-1, 4)
  155. mosaic_labels = np.array([]).reshape(-1)
  156. else:
  157. mosaic_bboxes = np.concatenate(mosaic_bboxes)
  158. mosaic_labels = np.concatenate(mosaic_labels)
  159. # clip
  160. mosaic_bboxes = mosaic_bboxes.clip(0, img_size * 2)
  161. # random perspective
  162. mosaic_targets = np.concatenate([mosaic_labels[..., None], mosaic_bboxes], axis=-1)
  163. mosaic_img, mosaic_targets = random_perspective(
  164. mosaic_img,
  165. mosaic_targets,
  166. affine_params['degrees'],
  167. translate=affine_params['translate'],
  168. scale=affine_params['scale'],
  169. shear=affine_params['shear'],
  170. perspective=affine_params['perspective'],
  171. border=[-img_size//2, -img_size//2]
  172. )
  173. # target
  174. mosaic_target = {
  175. "boxes": mosaic_targets[..., 1:],
  176. "labels": mosaic_targets[..., 0],
  177. "orig_size": [img_size, img_size]
  178. }
  179. return mosaic_img, mosaic_target
  180. ## YOLOv5-Mixup
  181. def yolov5_mixup_augment(origin_image, origin_target, new_image, new_target):
  182. if origin_image.shape[:2] != new_image.shape[:2]:
  183. img_size = max(new_image.shape[:2])
  184. # origin_image is not a mosaic image
  185. orig_h, orig_w = origin_image.shape[:2]
  186. scale_ratio = img_size / max(orig_h, orig_w)
  187. if scale_ratio != 1:
  188. interp = cv2.INTER_LINEAR if scale_ratio > 1 else cv2.INTER_AREA
  189. resize_size = (int(orig_w * scale_ratio), int(orig_h * scale_ratio))
  190. origin_image = cv2.resize(origin_image, resize_size, interpolation=interp)
  191. # pad new image
  192. pad_origin_image = np.ones([img_size, img_size, origin_image.shape[2]], dtype=np.uint8) * 114
  193. pad_origin_image[:resize_size[1], :resize_size[0]] = origin_image
  194. origin_image = pad_origin_image.copy()
  195. del pad_origin_image
  196. r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
  197. mixup_image = r * origin_image.astype(np.float32) + \
  198. (1.0 - r)* new_image.astype(np.float32)
  199. mixup_image = mixup_image.astype(np.uint8)
  200. cls_labels = new_target["labels"].copy()
  201. box_labels = new_target["boxes"].copy()
  202. mixup_bboxes = np.concatenate([origin_target["boxes"], box_labels], axis=0)
  203. mixup_labels = np.concatenate([origin_target["labels"], cls_labels], axis=0)
  204. mixup_target = {
  205. "boxes": mixup_bboxes,
  206. "labels": mixup_labels,
  207. 'orig_size': mixup_image.shape[:2]
  208. }
  209. return mixup_image, mixup_target
  210. ## YOLOX-Mixup
  211. def yolox_mixup_augment(origin_img, origin_target, new_img, new_target, img_size, mixup_scale):
  212. jit_factor = random.uniform(*mixup_scale)
  213. FLIP = random.uniform(0, 1) > 0.5
  214. # resize new image
  215. orig_h, orig_w = new_img.shape[:2]
  216. cp_scale_ratio = img_size / max(orig_h, orig_w)
  217. if cp_scale_ratio != 1:
  218. interp = cv2.INTER_LINEAR if cp_scale_ratio > 1 else cv2.INTER_AREA
  219. resized_new_img = cv2.resize(
  220. new_img, (int(orig_w * cp_scale_ratio), int(orig_h * cp_scale_ratio)), interpolation=interp)
  221. else:
  222. resized_new_img = new_img
  223. # pad new image
  224. cp_img = np.ones([img_size, img_size, new_img.shape[2]], dtype=np.uint8) * 114
  225. new_shape = (resized_new_img.shape[1], resized_new_img.shape[0])
  226. cp_img[:new_shape[1], :new_shape[0]] = resized_new_img
  227. # resize padded new image
  228. cp_img_h, cp_img_w = cp_img.shape[:2]
  229. cp_new_shape = (int(cp_img_w * jit_factor),
  230. int(cp_img_h * jit_factor))
  231. cp_img = cv2.resize(cp_img, (cp_new_shape[0], cp_new_shape[1]))
  232. cp_scale_ratio *= jit_factor
  233. # flip new image
  234. if FLIP:
  235. cp_img = cp_img[:, ::-1, :]
  236. # pad image
  237. origin_h, origin_w = cp_img.shape[:2]
  238. target_h, target_w = origin_img.shape[:2]
  239. padded_img = np.zeros(
  240. (max(origin_h, target_h), max(origin_w, target_w), 3), dtype=np.uint8
  241. )
  242. padded_img[:origin_h, :origin_w] = cp_img
  243. # crop padded image
  244. x_offset, y_offset = 0, 0
  245. if padded_img.shape[0] > target_h:
  246. y_offset = random.randint(0, padded_img.shape[0] - target_h - 1)
  247. if padded_img.shape[1] > target_w:
  248. x_offset = random.randint(0, padded_img.shape[1] - target_w - 1)
  249. padded_cropped_img = padded_img[
  250. y_offset: y_offset + target_h, x_offset: x_offset + target_w
  251. ]
  252. # process target
  253. new_boxes = new_target["boxes"]
  254. new_labels = new_target["labels"]
  255. new_boxes[:, 0::2] = np.clip(new_boxes[:, 0::2] * cp_scale_ratio, 0, origin_w)
  256. new_boxes[:, 1::2] = np.clip(new_boxes[:, 1::2] * cp_scale_ratio, 0, origin_h)
  257. if FLIP:
  258. new_boxes[:, 0::2] = (
  259. origin_w - new_boxes[:, 0::2][:, ::-1]
  260. )
  261. new_boxes[:, 0::2] = np.clip(
  262. new_boxes[:, 0::2] - x_offset, 0, target_w
  263. )
  264. new_boxes[:, 1::2] = np.clip(
  265. new_boxes[:, 1::2] - y_offset, 0, target_h
  266. )
  267. # mixup target
  268. mixup_boxes = np.concatenate([new_boxes, origin_target['boxes']], axis=0)
  269. mixup_labels = np.concatenate([new_labels, origin_target['labels']], axis=0)
  270. mixup_target = {
  271. 'boxes': mixup_boxes,
  272. 'labels': mixup_labels
  273. }
  274. # mixup images
  275. origin_img = origin_img.astype(np.float32)
  276. origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32)
  277. return origin_img.astype(np.uint8), mixup_target
  278. # ------------------------- Preprocessers -------------------------
  279. ## YOLOv5-style Transform for Train
  280. class YOLOv5Augmentation(object):
  281. def __init__(self, img_size=640, trans_config=None, use_ablu=False):
  282. # Basic parameters
  283. self.img_size = img_size
  284. self.pixel_mean = [0., 0., 0.]
  285. self.pixel_std = [255., 255., 255.]
  286. self.color_format = 'bgr'
  287. self.trans_config = trans_config
  288. # Albumentations
  289. self.ablu_trans = Albumentations(img_size) if use_ablu else None
  290. def __call__(self, image, target, mosaic=False):
  291. # --------------- Keep ratio Resize ---------------
  292. img_h0, img_w0 = image.shape[:2]
  293. ratio = self.img_size / max(img_h0, img_w0)
  294. if ratio != 1:
  295. interp = cv2.INTER_LINEAR
  296. new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
  297. img = cv2.resize(image, new_shape, interpolation=interp)
  298. else:
  299. img = image
  300. img_h, img_w = img.shape[:2]
  301. # --------------- Filter bad targets ---------------
  302. tgt_boxes_wh = target["boxes"][..., 2:] - target["boxes"][..., :2]
  303. min_tgt_size = np.min(tgt_boxes_wh, axis=-1)
  304. keep = (min_tgt_size > 1)
  305. target["boxes"] = target["boxes"][keep]
  306. target["labels"] = target["labels"][keep]
  307. # --------------- Albumentations ---------------
  308. if self.ablu_trans is not None:
  309. img, target = self.ablu_trans(img, target)
  310. # --------------- HSV augmentations ---------------
  311. augment_hsv(img, hgain=self.trans_config['hsv_h'],
  312. sgain=self.trans_config['hsv_s'],
  313. vgain=self.trans_config['hsv_v'])
  314. # --------------- Spatial augmentations ---------------
  315. ## Random perspective
  316. if not mosaic:
  317. # rescale bbox
  318. boxes_ = target["boxes"].copy()
  319. boxes_[:, [0, 2]] = boxes_[:, [0, 2]] / img_w0 * img_w
  320. boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
  321. target["boxes"] = boxes_
  322. # spatial augment
  323. target_ = np.concatenate(
  324. (target['labels'][..., None], target['boxes']), axis=-1)
  325. img, target_ = random_perspective(
  326. img, target_,
  327. degrees=self.trans_config['degrees'],
  328. translate=self.trans_config['translate'],
  329. scale=self.trans_config['scale'],
  330. shear=self.trans_config['shear'],
  331. perspective=self.trans_config['perspective']
  332. )
  333. target['boxes'] = target_[..., 1:]
  334. target['labels'] = target_[..., 0]
  335. ## Random flip
  336. if random.random() < 0.5:
  337. w = img.shape[1]
  338. img = np.fliplr(img).copy()
  339. boxes = target['boxes'].copy()
  340. boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
  341. target["boxes"] = boxes
  342. # --------------- To torch.Tensor ---------------
  343. img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
  344. if target is not None:
  345. target["boxes"] = torch.as_tensor(target["boxes"]).float()
  346. target["labels"] = torch.as_tensor(target["labels"]).long()
  347. # --------------- Pad image ---------------
  348. img_h0, img_w0 = img_tensor.shape[1:]
  349. pad_image = torch.ones([img_tensor.size(0), self.img_size, self.img_size]).float() * 114.
  350. pad_image[:, :img_h0, :img_w0] = img_tensor
  351. dh = self.img_size - img_h0
  352. dw = self.img_size - img_w0
  353. # normalize image
  354. pad_image /= 255.
  355. return pad_image, target, ratio #[dw, dh]
  356. ## YOLOv5-style Transform for Eval
  357. class YOLOv5BaseTransform(object):
  358. def __init__(self, img_size=640, max_stride=32):
  359. self.img_size = img_size
  360. self.max_stride = max_stride
  361. self.pixel_mean = [0., 0., 0.]
  362. self.pixel_std = [255., 255., 255.]
  363. self.color_format = 'bgr'
  364. def __call__(self, image, target=None, mosaic=False):
  365. # --------------- Keep ratio Resize ---------------
  366. ## Resize image
  367. img_h0, img_w0 = image.shape[:2]
  368. ratio = self.img_size / max(img_h0, img_w0)
  369. if ratio != 1:
  370. new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
  371. img = cv2.resize(image, new_shape, interpolation=cv2.INTER_LINEAR)
  372. else:
  373. img = image
  374. img_h, img_w = img.shape[:2]
  375. ## Rescale bboxes
  376. if target is not None:
  377. # rescale bbox
  378. boxes_ = target["boxes"].copy()
  379. boxes_[:, [0, 2]] = boxes_[:, [0, 2]] / img_w0 * img_w
  380. boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
  381. target["boxes"] = boxes_
  382. # --------------- To torch.Tensor ---------------
  383. img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
  384. if target is not None:
  385. target["boxes"] = torch.as_tensor(target["boxes"]).float()
  386. target["labels"] = torch.as_tensor(target["labels"]).long()
  387. # --------------- Pad image ---------------
  388. img_h0, img_w0 = img_tensor.shape[1:]
  389. dh = img_h0 % self.max_stride
  390. dw = img_w0 % self.max_stride
  391. dh = dh if dh == 0 else self.max_stride - dh
  392. dw = dw if dw == 0 else self.max_stride - dw
  393. pad_img_h = img_h0 + dh
  394. pad_img_w = img_w0 + dw
  395. pad_image = torch.ones([img_tensor.size(0), pad_img_h, pad_img_w]).float() * 114.
  396. pad_image[:, :img_h0, :img_w0] = img_tensor
  397. # normalize image
  398. pad_image /= 255.
  399. return pad_image, target, ratio #[dw, dh]