yolo_augment.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. import random
  2. import cv2
  3. import math
  4. import numpy as np
  5. import torch
  6. import torchvision.transforms.functional as F
  7. # ------------------------- Basic augmentations -------------------------
  8. ## Spatial transform
  9. def random_perspective(image,
  10. targets=(),
  11. degrees=10,
  12. translate=.1,
  13. scale=[0.1, 2.0],
  14. shear=10,
  15. perspective=0.0,
  16. border=(0, 0)):
  17. # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
  18. # targets = [cls, xyxy]
  19. height = image.shape[0] + border[0] * 2 # shape(h,w,c)
  20. width = image.shape[1] + border[1] * 2
  21. # Center
  22. C = np.eye(3)
  23. C[0, 2] = -image.shape[1] / 2 # x translation (pixels)
  24. C[1, 2] = -image.shape[0] / 2 # y translation (pixels)
  25. # Perspective
  26. P = np.eye(3)
  27. P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
  28. P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
  29. # Rotation and Scale
  30. R = np.eye(3)
  31. a = random.uniform(-degrees, degrees)
  32. # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
  33. s = random.uniform(scale[0], scale[1])
  34. # s = 2 ** random.uniform(-scale, scale)
  35. R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
  36. # Shear
  37. S = np.eye(3)
  38. S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
  39. S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
  40. # Translation
  41. T = np.eye(3)
  42. T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
  43. T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
  44. # Combined rotation matrix
  45. M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
  46. if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
  47. if perspective:
  48. image = cv2.warpPerspective(image, M, dsize=(width, height), borderValue=(114, 114, 114))
  49. else: # affine
  50. image = cv2.warpAffine(image, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
  51. # Transform label coordinates
  52. n = len(targets)
  53. if n:
  54. new = np.zeros((n, 4))
  55. # warp boxes
  56. xy = np.ones((n * 4, 3))
  57. xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
  58. xy = xy @ M.T # transform
  59. xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
  60. # create new boxes
  61. x = xy[:, [0, 2, 4, 6]]
  62. y = xy[:, [1, 3, 5, 7]]
  63. new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
  64. # clip
  65. new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
  66. new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
  67. targets[:, 1:5] = new
  68. return image, targets
  69. ## Color transform
  70. def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
  71. r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
  72. hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
  73. dtype = img.dtype # uint8
  74. x = np.arange(0, 256, dtype=np.int16)
  75. lut_hue = ((x * r[0]) % 180).astype(dtype)
  76. lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
  77. lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
  78. img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
  79. cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
  80. return img
  81. # ------------------------- Preprocessers -------------------------
  82. ## YOLO-style Transform for Train
  83. class YOLOAugmentation(object):
  84. def __init__(self,
  85. img_size=640,
  86. affine_params=None,
  87. pixel_mean = [0., 0., 0.],
  88. pixel_std = [255., 255., 255.],
  89. box_format='xyxy',
  90. normalize_coords=False):
  91. # Basic parameters
  92. self.img_size = img_size
  93. self.pixel_mean = pixel_mean
  94. self.pixel_std = pixel_std
  95. self.box_format = box_format
  96. self.affine_params = affine_params
  97. self.normalize_coords = normalize_coords
  98. self.color_format = 'bgr'
  99. def __call__(self, image, target, mosaic=False):
  100. # --------------- Resize image ---------------
  101. orig_h, orig_w = image.shape[:2]
  102. ratio = self.img_size / max(orig_h, orig_w)
  103. if ratio != 1:
  104. new_shape = (int(round(orig_w * ratio)), int(round(orig_h * ratio)))
  105. image = cv2.resize(image, new_shape)
  106. img_h, img_w = image.shape[:2]
  107. # rescale bbox
  108. target["boxes"][..., [0, 2]] = target["boxes"][..., [0, 2]] / orig_w * img_w
  109. target["boxes"][..., [1, 3]] = target["boxes"][..., [1, 3]] / orig_h * img_h
  110. # --------------- HSV augmentations ---------------
  111. image = augment_hsv(image,
  112. hgain=self.affine_params['hsv_h'],
  113. sgain=self.affine_params['hsv_s'],
  114. vgain=self.affine_params['hsv_v'])
  115. # --------------- Spatial augmentations ---------------
  116. ## Random perspective
  117. if not mosaic:
  118. # spatial augment
  119. target_ = np.concatenate((target['labels'][..., None], target['boxes']), axis=-1)
  120. image, target_ = random_perspective(image, target_,
  121. degrees = self.affine_params['degrees'],
  122. translate = self.affine_params['translate'],
  123. scale = self.affine_params['scale'],
  124. shear = self.affine_params['shear'],
  125. perspective = self.affine_params['perspective']
  126. )
  127. target['boxes'] = target_[..., 1:]
  128. target['labels'] = target_[..., 0]
  129. ## Random flip
  130. if random.random() < 0.5:
  131. w = image.shape[1]
  132. image = np.fliplr(image).copy()
  133. boxes = target['boxes'].copy()
  134. boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
  135. target["boxes"] = boxes
  136. # --------------- To torch.Tensor ---------------
  137. image = F.to_tensor(image) * 255.
  138. if target is not None:
  139. target["boxes"] = torch.as_tensor(target["boxes"]).float()
  140. target["labels"] = torch.as_tensor(target["labels"]).long()
  141. # normalize coords
  142. if self.normalize_coords:
  143. target["boxes"][..., [0, 2]] /= img_w
  144. target["boxes"][..., [1, 3]] /= img_h
  145. # xyxy -> xywh
  146. if self.box_format == "xywh":
  147. box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
  148. box_bwbh = target["boxes"][..., 2:] - target["boxes"][..., :2]
  149. target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
  150. # --------------- Pad Image ---------------
  151. img_h0, img_w0 = image.shape[1:]
  152. pad_image = torch.ones([image.size(0), self.img_size, self.img_size]).float() * 114.
  153. pad_image[:, :img_h0, :img_w0] = image
  154. # --------------- Normalize ---------------
  155. pad_image = F.normalize(pad_image, self.pixel_mean, self.pixel_std)
  156. return pad_image, target, ratio
  157. ## YOLO-style Transform for Eval
  158. class YOLOBaseTransform(object):
  159. def __init__(self,
  160. img_size=640,
  161. max_stride=32,
  162. pixel_mean = [0., 0., 0.],
  163. pixel_std = [255., 255., 255.],
  164. box_format='xyxy',
  165. normalize_coords=False):
  166. self.img_size = img_size
  167. self.max_stride = max_stride
  168. self.pixel_mean = pixel_mean
  169. self.pixel_std = pixel_std
  170. self.box_format = box_format
  171. self.normalize_coords = normalize_coords
  172. self.color_format = 'bgr'
  173. def __call__(self, image, target=None, mosaic=False):
  174. # --------------- Resize image ---------------
  175. orig_h, orig_w = image.shape[:2]
  176. ratio = self.img_size / max(orig_h, orig_w)
  177. if ratio != 1:
  178. new_shape = (int(round(orig_w * ratio)), int(round(orig_h * ratio)))
  179. image = cv2.resize(image, new_shape)
  180. img_h, img_w = image.shape[:2]
  181. # --------------- Rescale bboxes ---------------
  182. if target is not None:
  183. # rescale bbox
  184. target["boxes"][..., [0, 2]] = target["boxes"][..., [0, 2]] / orig_w * img_w
  185. target["boxes"][..., [1, 3]] = target["boxes"][..., [1, 3]] / orig_h * img_h
  186. # --------------- To torch.Tensor ---------------
  187. image = F.to_tensor(image) * 255.
  188. if target is not None:
  189. target["boxes"] = torch.as_tensor(target["boxes"]).float()
  190. target["labels"] = torch.as_tensor(target["labels"]).long()
  191. # normalize coords
  192. if self.normalize_coords:
  193. target["boxes"][..., [0, 2]] /= img_w
  194. target["boxes"][..., [1, 3]] /= img_h
  195. # xyxy -> xywh
  196. if self.box_format == "xywh":
  197. box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
  198. box_bwbh = target["boxes"][..., 2:] - target["boxes"][..., :2]
  199. target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
  200. # --------------- Pad image ---------------
  201. img_h0, img_w0 = image.shape[1:]
  202. dh = img_h0 % self.max_stride
  203. dw = img_w0 % self.max_stride
  204. dh = dh if dh == 0 else self.max_stride - dh
  205. dw = dw if dw == 0 else self.max_stride - dw
  206. pad_img_h = img_h0 + dh
  207. pad_img_w = img_w0 + dw
  208. pad_image = torch.ones([image.size(0), pad_img_h, pad_img_w]).float() * 114.
  209. pad_image[:, :img_h0, :img_w0] = image
  210. # --------------- Normalize ---------------
  211. pad_image = F.normalize(pad_image, self.pixel_mean, self.pixel_std)
  212. return pad_image, target, ratio
  213. if __name__ == "__main__":
  214. image_path = "voc_image.jpg"
  215. is_train = False
  216. affine_params = {
  217. 'degrees': 0.0,
  218. 'translate': 0.2,
  219. 'scale': [0.1, 2.0],
  220. 'shear': 0.0,
  221. 'perspective': 0.0,
  222. 'hsv_h': 0.015,
  223. 'hsv_s': 0.7,
  224. 'hsv_v': 0.4,
  225. }
  226. if is_train:
  227. ssd_augment = YOLOAugmentation(img_size=416,
  228. affine_params=affine_params,
  229. pixel_mean=[0., 0., 0.],
  230. pixel_std=[255., 255., 255.],
  231. box_format="xyxy",
  232. normalize_coords=False,
  233. )
  234. else:
  235. ssd_augment = YOLOBaseTransform(img_size=416,
  236. max_stride=32,
  237. pixel_mean=[0., 0., 0.],
  238. pixel_std=[255., 255., 255.],
  239. box_format="xyxy",
  240. normalize_coords=False,
  241. )
  242. # 读取图像数据
  243. orig_image = cv2.imread(image_path)
  244. target = {
  245. "boxes": np.array([[86, 96, 256, 425], [132, 71, 243, 282]], dtype=np.float32),
  246. "labels": np.array([12, 14], dtype=np.int32),
  247. }
  248. # 绘制原始数据的边界框
  249. image_copy = orig_image.copy()
  250. for box in target["boxes"]:
  251. x1, y1, x2, y2 = box
  252. image_copy = cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
  253. cv2.imshow("original image", image_copy)
  254. cv2.waitKey(0)
  255. # 展示预处理后的输入图像数据和标签信息
  256. image_aug, target_aug, _ = ssd_augment(orig_image, target)
  257. # [c, h, w] -> [h, w, c]
  258. image_aug = image_aug.permute(1, 2, 0).contiguous().numpy()
  259. image_aug = np.clip(image_aug * 255, 0, 255).astype(np.uint8)
  260. image_aug = image_aug.copy()
  261. # 绘制处理后的边界框
  262. for box in target_aug["boxes"]:
  263. x1, y1, x2, y2 = box
  264. image_aug = cv2.rectangle(image_aug, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
  265. cv2.imshow("processed image", image_aug)
  266. cv2.waitKey(0)