yjh0410 1 年之前
父節點
當前提交
494dc09e4a
共有 2 個文件被更改,包括 91 次插入11 次删除
  1. 28 11
      yolo/dataset/data_augment/ssd_augment.py
  2. 63 0
      yolo/dataset/data_augment/yolo_augment.py

+ 28 - 11
yolo/dataset/data_augment/ssd_augment.py

@@ -143,20 +143,26 @@ class RandomDistort(object):
             distortions = np.random.permutation(functions)[:self.count]
             for func in distortions:
                 image, target = func(image, target)
+                image = np.clip(image, 0.0, 255.)
 
             return image, target
 
         image, target = self.apply_brightness(image, target)
+        image = np.clip(image, 0.0, 255.)
         mode = np.random.randint(0, 2)
 
         if mode:
             image, target = self.apply_contrast(image, target)
+            image = np.clip(image, 0.0, 255.)
 
         image, target = self.apply_saturation(image, target)
+        image = np.clip(image, 0.0, 255.)
         image, target = self.apply_hue(image, target)
+        image = np.clip(image, 0.0, 255.)
 
         if not mode:
             image, target = self.apply_contrast(image, target)
+            image = np.clip(image, 0.0, 255.)
 
         if self.random_channel:
             if np.random.randint(0, 2):
@@ -560,35 +566,46 @@ if __name__ == "__main__":
     is_train = True
 
     if is_train:
-        ssd_augment = SSDAugmentation(img_size=512,
+        ssd_augment = SSDAugmentation(img_size=416,
                                       pixel_mean=[0., 0., 0.],
                                       pixel_std=[255., 255., 255.],
                                       box_format="xyxy",
                                       normalize_coords=False,
                                       )
     else:
-        ssd_augment = SSDBaseTransform(img_size=512,
+        ssd_augment = SSDBaseTransform(img_size=416,
                                        pixel_mean=[0., 0., 0.],
                                        pixel_std=[255., 255., 255.],
                                        box_format="xyxy",
                                        normalize_coords=False,
                                        )
     
-    # 展示输入图像数据和标签信息
-    image = cv2.imread(image_path)
+    # 读取图像数据
+    orig_image = cv2.imread(image_path)
     target = {
         "boxes": np.array([[86, 96, 256, 425], [132, 71, 243, 282]], dtype=np.float32),
         "labels": np.array([12, 14], dtype=np.int32),
     }
-    cv2.imshow("original image", image)
+
+    # 绘制原始数据的边界框
+    image_copy = orig_image.copy()
+    for box in target["boxes"]:
+        x1, y1, x2, y2 = box
+        image_copy = cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
+    cv2.imshow("original image", image_copy)
     cv2.waitKey(0)
 
     # 展示预处理后的输入图像数据和标签信息
-    image, target, _ = ssd_augment(image, target)
+    image_aug, target_aug, _ = ssd_augment(orig_image, target)
     # [c, h, w] -> [h, w, c]
-    image = image.permute(1, 2, 0).contiguous().numpy()
-    image = np.clip(image * 255, 0, 255).astype(np.uint8)
-
-    image = image[:, :, (2, 1, 0)]  # 切换为CV2默认的BGR通道顺序
-    cv2.imshow("processed image", image)
+    image_aug = image_aug.permute(1, 2, 0).contiguous().numpy()
+    image_aug = np.clip(image_aug * 255, 0, 255).astype(np.uint8)
+    image_aug = image_aug[:, :, (2, 1, 0)]  # 切换为CV2默认的BGR通道顺序
+    image_aug = image_aug.copy()
+
+    # 绘制处理后的边界框
+    for box in target_aug["boxes"]:
+        x1, y1, x2, y2 = box
+        image_aug = cv2.rectangle(image_aug, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
+    cv2.imshow("processed image", image_aug)
     cv2.waitKey(0)

+ 63 - 0
yolo/dataset/data_augment/yolo_augment.py

@@ -252,3 +252,66 @@ class YOLOBaseTransform(object):
         pad_image = F.normalize(pad_image, self.pixel_mean, self.pixel_std)
 
         return pad_image, target, ratio
+
+
+if __name__ == "__main__":
+    image_path = "voc_image.jpg"
+    is_train = False
+
+    affine_params = {
+        'degrees': 0.0,
+        'translate': 0.2,
+        'scale': [0.1, 2.0],
+        'shear': 0.0,
+        'perspective': 0.0,
+        'hsv_h': 0.015,
+        'hsv_s': 0.7,
+        'hsv_v': 0.4,
+    }
+
+
+    if is_train:
+        ssd_augment = YOLOAugmentation(img_size=416,
+                                       affine_params=affine_params,
+                                       pixel_mean=[0., 0., 0.],
+                                       pixel_std=[255., 255., 255.],
+                                       box_format="xyxy",
+                                       normalize_coords=False,
+                                       )
+    else:
+        ssd_augment = YOLOBaseTransform(img_size=416,
+                                        max_stride=32,
+                                        pixel_mean=[0., 0., 0.],
+                                        pixel_std=[255., 255., 255.],
+                                        box_format="xyxy",
+                                        normalize_coords=False,
+                                        )
+    
+    # 读取图像数据
+    orig_image = cv2.imread(image_path)
+    target = {
+        "boxes": np.array([[86, 96, 256, 425], [132, 71, 243, 282]], dtype=np.float32),
+        "labels": np.array([12, 14], dtype=np.int32),
+    }
+
+    # 绘制原始数据的边界框
+    image_copy = orig_image.copy()
+    for box in target["boxes"]:
+        x1, y1, x2, y2 = box
+        image_copy = cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
+    cv2.imshow("original image", image_copy)
+    cv2.waitKey(0)
+
+    # 展示预处理后的输入图像数据和标签信息
+    image_aug, target_aug, _ = ssd_augment(orig_image, target)
+    # [c, h, w] -> [h, w, c]
+    image_aug = image_aug.permute(1, 2, 0).contiguous().numpy()
+    image_aug = np.clip(image_aug * 255, 0, 255).astype(np.uint8)
+    image_aug = image_aug.copy()
+
+    # 绘制处理后的边界框
+    for box in target_aug["boxes"]:
+        x1, y1, x2, y2 = box
+        image_aug = cv2.rectangle(image_aug, (int(x1), int(y1)), (int(x2), int(y2)), [0, 0, 255], 2)
+    cv2.imshow("processed image", image_aug)
+    cv2.waitKey(0)