Browse Source

moidy transform

yjh0410 1 year ago
parent
commit
8dcae7c06b

+ 4 - 8
yolo/dataset/build.py

@@ -50,15 +50,13 @@ def build_transform(cfg, is_train=False):
                                          cfg.affine_params,
                                          cfg.pixel_mean,
                                          cfg.pixel_std,
-                                         cfg.box_format,
-                                         cfg.normalize_coords)
+                                         )
         else:
             transform = YOLOBaseTransform(cfg.test_img_size,
                                           cfg.max_stride,
                                           cfg.pixel_mean,
                                           cfg.pixel_std,
-                                          cfg.box_format,
-                                          cfg.normalize_coords)
+                                          )
 
     ## RT-DETR style transform
     elif cfg.aug_type == 'ssd':
@@ -66,13 +64,11 @@ def build_transform(cfg, is_train=False):
             transform = SSDAugmentation(cfg.train_img_size,
                                            cfg.pixel_mean,
                                            cfg.pixel_std,
-                                           cfg.box_format,
-                                           cfg.normalize_coords)
+                                           )
         else:
             transform = SSDBaseTransform(cfg.test_img_size,
                                             cfg.pixel_mean,
                                             cfg.pixel_std,
-                                            cfg.box_format,
-                                            cfg.normalize_coords)
+                                            )
 
     return transform

+ 1 - 19
yolo/dataset/coco.py

@@ -132,8 +132,6 @@ if __name__ == "__main__":
         def __init__(self) -> None:
             self.max_stride = 32
             # ---------------- Data process config ----------------
-            self.box_format = 'xywh'
-            self.normalize_coords = False
             self.mosaic_prob = 1.0
             self.mixup_prob  = 0.15
             self.copy_paste  = 0.3
@@ -160,8 +158,6 @@ if __name__ == "__main__":
         def __init__(self) -> None:
             self.max_stride = 32
             # ---------------- Data process config ----------------
-            self.box_format = 'xywh'
-            self.normalize_coords = False
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
             self.copy_paste  = 0.0
@@ -211,21 +207,7 @@ if __name__ == "__main__":
         labels = target["labels"]
 
         for box, label in zip(boxes, labels):
-            if cfg.box_format == 'xyxy':
-                x1, y1, x2, y2 = box
-            elif cfg.box_format == 'xywh':
-                cx, cy, bw, bh = box
-                x1 = cx - 0.5 * bw
-                y1 = cy - 0.5 * bh
-                x2 = cx + 0.5 * bw
-                y2 = cy + 0.5 * bh
-            
-            if cfg.normalize_coords:
-                x1 *= img_w
-                y1 *= img_h
-                x2 *= img_w
-                y2 *= img_h
-
+            x1, y1, x2, y2 = box
             cls_id = int(label)
             color = class_colors[cls_id]
             # class name

+ 5 - 24
yolo/dataset/data_augment/ssd_augment.py

@@ -430,21 +430,14 @@ class Resize(object):
 
 ## Normalize tensor image
 class Normalize(object):
-    def __init__(self, pixel_mean, pixel_std, normalize_coords=False):
+    def __init__(self, pixel_mean, pixel_std):
         self.pixel_mean = pixel_mean
         self.pixel_std = pixel_std
-        self.normalize_coords = normalize_coords
 
     def __call__(self, image, target=None):
         # normalize image
         image = (image - self.pixel_mean) / self.pixel_std
 
-        # normalize bbox
-        if target is not None and self.normalize_coords:
-            img_h, img_w = image.shape[:2]
-            target["boxes"][..., [0, 2]] = target["boxes"][..., [0, 2]] / float(img_w)
-            target["boxes"][..., [1, 3]] = target["boxes"][..., [1, 3]] / float(img_h)
-
         return image, target
 
 ## Convert ndarray to torch.Tensor
@@ -489,14 +482,11 @@ class SSDAugmentation(object):
                  img_size   = 640,
                  pixel_mean = [123.675, 116.28, 103.53],
                  pixel_std  = [58.395, 57.12, 57.375],
-                 box_format = 'xywh',
-                 normalize_coords = False):
+                 ):
         # ----------------- Basic parameters -----------------
         self.img_size = img_size
-        self.box_format = box_format
         self.pixel_mean = pixel_mean   # RGB format
         self.pixel_std  = pixel_std    # RGB format
-        self.normalize_coords = normalize_coords
         self.color_format = 'rgb'
         print("================= Pixel Statistics =================")
         print("Pixel mean: {}".format(self.pixel_mean))
@@ -510,9 +500,8 @@ class SSDAugmentation(object):
             RandomHorizontalFlip(p=0.5),
             Resize(img_size=self.img_size),
             ConvertColorFormat(self.color_format),
-            Normalize(self.pixel_mean, self.pixel_std, normalize_coords),
+            Normalize(self.pixel_mean, self.pixel_std),
             ToTensor(),
-            ConvertBoxFormat(self.box_format),
         ])
 
     def __call__(self, image, target, mosaic=False):
@@ -529,14 +518,11 @@ class SSDBaseTransform(object):
                  img_size   = 640,
                  pixel_mean = [123.675, 116.28, 103.53],
                  pixel_std  = [58.395, 57.12, 57.375],
-                 box_format = 'xywh',
-                 normalize_coords = False):
+                 ):
         # ----------------- Basic parameters -----------------
         self.img_size = img_size
-        self.box_format = box_format
         self.pixel_mean = pixel_mean  # RGB format
         self.pixel_std  = pixel_std    # RGB format
-        self.normalize_coords = normalize_coords
         self.color_format = 'rgb'
         print("================= Pixel Statistics =================")
         print("Pixel mean: {}".format(self.pixel_mean))
@@ -546,9 +532,8 @@ class SSDBaseTransform(object):
         self.transform = Compose([
             Resize(img_size=self.img_size),
             ConvertColorFormat(self.color_format),
-            Normalize(self.pixel_mean, self.pixel_std, self.normalize_coords),
+            Normalize(self.pixel_mean, self.pixel_std),
             ToTensor(),
-            ConvertBoxFormat(self.box_format),
         ])
 
 
@@ -569,15 +554,11 @@ if __name__ == "__main__":
         ssd_augment = SSDAugmentation(img_size=416,
                                       pixel_mean=[0., 0., 0.],
                                       pixel_std=[255., 255., 255.],
-                                      box_format="xyxy",
-                                      normalize_coords=False,
                                       )
     else:
         ssd_augment = SSDBaseTransform(img_size=416,
                                        pixel_mean=[0., 0., 0.],
                                        pixel_std=[255., 255., 255.],
-                                       box_format="xyxy",
-                                       normalize_coords=False,
                                        )
     
     # 读取图像数据

+ 2 - 30
yolo/dataset/data_augment/yolo_augment.py

@@ -107,15 +107,12 @@ class YOLOAugmentation(object):
                  affine_params=None,
                  pixel_mean = [0., 0., 0.],
                  pixel_std  = [255., 255., 255.],
-                 box_format='xyxy',
-                 normalize_coords=False):
+                 ):
         # Basic parameters
         self.img_size   = img_size
         self.pixel_mean = pixel_mean
         self.pixel_std  = pixel_std
-        self.box_format = box_format
         self.affine_params = affine_params
-        self.normalize_coords = normalize_coords
         self.color_format = 'bgr'
 
     def __call__(self, image, target, mosaic=False):
@@ -166,17 +163,6 @@ class YOLOAugmentation(object):
             target["boxes"] = torch.as_tensor(target["boxes"]).float()
             target["labels"] = torch.as_tensor(target["labels"]).long()
 
-            # normalize coords
-            if self.normalize_coords:
-                target["boxes"][..., [0, 2]] /= img_w
-                target["boxes"][..., [1, 3]] /= img_h
-
-            # xyxy -> xywh
-            if self.box_format == "xywh":
-                box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
-                box_bwbh =  target["boxes"][..., 2:] - target["boxes"][..., :2]
-                target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
-
         # --------------- Pad Image ---------------
         img_h0, img_w0 = image.shape[1:]
         pad_image = torch.ones([image.size(0), self.img_size, self.img_size]).float() * 114.
@@ -194,14 +180,11 @@ class YOLOBaseTransform(object):
                  max_stride=32,
                  pixel_mean = [0., 0., 0.],
                  pixel_std  = [255., 255., 255.],
-                 box_format='xyxy',
-                 normalize_coords=False):
+                 ):
         self.img_size = img_size
         self.max_stride = max_stride
         self.pixel_mean = pixel_mean
         self.pixel_std  = pixel_std
-        self.box_format = box_format
-        self.normalize_coords = normalize_coords
         self.color_format = 'bgr'
 
     def __call__(self, image, target=None, mosaic=False):
@@ -225,17 +208,6 @@ class YOLOBaseTransform(object):
             target["boxes"] = torch.as_tensor(target["boxes"]).float()
             target["labels"] = torch.as_tensor(target["labels"]).long()
 
-            # normalize coords
-            if self.normalize_coords:
-                target["boxes"][..., [0, 2]] /= img_w
-                target["boxes"][..., [1, 3]] /= img_h
-            
-            # xyxy -> xywh
-            if self.box_format == "xywh":
-                box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
-                box_bwbh =  target["boxes"][..., 2:] - target["boxes"][..., :2]
-                target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
-
         # --------------- Pad image ---------------
         img_h0, img_w0 = image.shape[1:]
         dh = img_h0 % self.max_stride

+ 1 - 16
yolo/dataset/voc.py

@@ -238,8 +238,6 @@ if __name__ == "__main__":
         def __init__(self) -> None:
             self.max_stride = 32
             # ---------------- Data process config ----------------
-            self.box_format = 'xywh'
-            self.normalize_coords = False
             self.mosaic_prob = 0.0
             self.mixup_prob  = 0.0
             self.copy_paste  = 0.0
@@ -290,21 +288,8 @@ if __name__ == "__main__":
         labels = target["labels"]
 
         for box, label in zip(boxes, labels):
-            if cfg.box_format == 'xyxy':
-                x1, y1, x2, y2 = box
-            elif cfg.box_format == 'xywh':
-                cx, cy, bw, bh = box
-                x1 = cx - 0.5 * bw
-                y1 = cy - 0.5 * bh
-                x2 = cx + 0.5 * bw
-                y2 = cy + 0.5 * bh
+            x1, y1, x2, y2 = box
             
-            if cfg.normalize_coords:
-                x1 *= img_w
-                y1 *= img_h
-                x2 *= img_w
-                y2 *= img_h
-
             cls_id = int(label)
             color = class_colors[cls_id]
             # class name