1 year ago · 8dcae7c06b
--- a/yolo/dataset/build.py
+++ b/yolo/dataset/build.py
@@ -50,15 +50,13 @@ def build_transform(cfg, is_train=False):
 
				                                          cfg.affine_params,
			
 
				                                          cfg.pixel_mean,
			
 
				                                          cfg.pixel_std,
			
 
				-                                         cfg.box_format,
			
 
				-                                         cfg.normalize_coords)
			
 
				+                                         )
			
 
				         else:
			
 
				             transform = YOLOBaseTransform(cfg.test_img_size,
			
 
				                                           cfg.max_stride,
			
 
				                                           cfg.pixel_mean,
			
 
				                                           cfg.pixel_std,
			
 
				-                                          cfg.box_format,
			
 
				-                                          cfg.normalize_coords)
			
 
				+                                          )
			
 
				 
			
 
				     ## RT-DETR style transform
			
 
				     elif cfg.aug_type == 'ssd':
			
@@ -66,13 +64,11 @@ def build_transform(cfg, is_train=False):
 
				             transform = SSDAugmentation(cfg.train_img_size,
			
 
				                                            cfg.pixel_mean,
			
 
				                                            cfg.pixel_std,
			
 
				-                                           cfg.box_format,
			
 
				-                                           cfg.normalize_coords)
			
 
				+                                           )
			
 
				         else:
			
 
				             transform = SSDBaseTransform(cfg.test_img_size,
			
 
				                                             cfg.pixel_mean,
			
 
				                                             cfg.pixel_std,
			
 
				-                                            cfg.box_format,
			
 
				-                                            cfg.normalize_coords)
			
 
				+                                            )
			
 
				 
			
 
				     return transform
			
--- a/yolo/dataset/coco.py
+++ b/yolo/dataset/coco.py
@@ -132,8 +132,6 @@ if __name__ == "__main__":
 
				         def __init__(self) -> None:
			
 
				             self.max_stride = 32
			
 
				             # ---------------- Data process config ----------------
			
 
				-            self.box_format = 'xywh'
			
 
				-            self.normalize_coords = False
			
 
				             self.mosaic_prob = 1.0
			
 
				             self.mixup_prob  = 0.15
			
 
				             self.copy_paste  = 0.3
			
@@ -160,8 +158,6 @@ if __name__ == "__main__":
 
				         def __init__(self) -> None:
			
 
				             self.max_stride = 32
			
 
				             # ---------------- Data process config ----------------
			
 
				-            self.box_format = 'xywh'
			
 
				-            self.normalize_coords = False
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
 
				             self.copy_paste  = 0.0
			
@@ -211,21 +207,7 @@ if __name__ == "__main__":
 
				         labels = target["labels"]
			
 
				 
			
 
				         for box, label in zip(boxes, labels):
			
 
				-            if cfg.box_format == 'xyxy':
			
 
				-                x1, y1, x2, y2 = box
			
 
				-            elif cfg.box_format == 'xywh':
			
 
				-                cx, cy, bw, bh = box
			
 
				-                x1 = cx - 0.5 * bw
			
 
				-                y1 = cy - 0.5 * bh
			
 
				-                x2 = cx + 0.5 * bw
			
 
				-                y2 = cy + 0.5 * bh
			
 
				-            
			
 
				-            if cfg.normalize_coords:
			
 
				-                x1 *= img_w
			
 
				-                y1 *= img_h
			
 
				-                x2 *= img_w
			
 
				-                y2 *= img_h
			
 
				-
			
 
				+            x1, y1, x2, y2 = box
			
 
				             cls_id = int(label)
			
 
				             color = class_colors[cls_id]
			
 
				             # class name
			
--- a/yolo/dataset/data_augment/ssd_augment.py
+++ b/yolo/dataset/data_augment/ssd_augment.py
@@ -430,21 +430,14 @@ class Resize(object):
 
				 
			
 
				 ## Normalize tensor image
			
 
				 class Normalize(object):
			
 
				-    def __init__(self, pixel_mean, pixel_std, normalize_coords=False):
			
 
				+    def __init__(self, pixel_mean, pixel_std):
			
 
				         self.pixel_mean = pixel_mean
			
 
				         self.pixel_std = pixel_std
			
 
				-        self.normalize_coords = normalize_coords
			
 
				 
			
 
				     def __call__(self, image, target=None):
			
 
				         # normalize image
			
 
				         image = (image - self.pixel_mean) / self.pixel_std
			
 
				 
			
 
				-        # normalize bbox
			
 
				-        if target is not None and self.normalize_coords:
			
 
				-            img_h, img_w = image.shape[:2]
			
 
				-            target["boxes"][..., [0, 2]] = target["boxes"][..., [0, 2]] / float(img_w)
			
 
				-            target["boxes"][..., [1, 3]] = target["boxes"][..., [1, 3]] / float(img_h)
			
 
				-
			
 
				         return image, target
			
 
				 
			
 
				 ## Convert ndarray to torch.Tensor
			
@@ -489,14 +482,11 @@ class SSDAugmentation(object):
 
				                  img_size   = 640,
			
 
				                  pixel_mean = [123.675, 116.28, 103.53],
			
 
				                  pixel_std  = [58.395, 57.12, 57.375],
			
 
				-                 box_format = 'xywh',
			
 
				-                 normalize_coords = False):
			
 
				+                 ):
			
 
				         # ----------------- Basic parameters -----------------
			
 
				         self.img_size = img_size
			
 
				-        self.box_format = box_format
			
 
				         self.pixel_mean = pixel_mean   # RGB format
			
 
				         self.pixel_std  = pixel_std    # RGB format
			
 
				-        self.normalize_coords = normalize_coords
			
 
				         self.color_format = 'rgb'
			
 
				         print("================= Pixel Statistics =================")
			
 
				         print("Pixel mean: {}".format(self.pixel_mean))
			
@@ -510,9 +500,8 @@ class SSDAugmentation(object):
 
				             RandomHorizontalFlip(p=0.5),
			
 
				             Resize(img_size=self.img_size),
			
 
				             ConvertColorFormat(self.color_format),
			
 
				-            Normalize(self.pixel_mean, self.pixel_std, normalize_coords),
			
 
				+            Normalize(self.pixel_mean, self.pixel_std),
			
 
				             ToTensor(),
			
 
				-            ConvertBoxFormat(self.box_format),
			
 
				         ])
			
 
				 
			
 
				     def __call__(self, image, target, mosaic=False):
			
@@ -529,14 +518,11 @@ class SSDBaseTransform(object):
 
				                  img_size   = 640,
			
 
				                  pixel_mean = [123.675, 116.28, 103.53],
			
 
				                  pixel_std  = [58.395, 57.12, 57.375],
			
 
				-                 box_format = 'xywh',
			
 
				-                 normalize_coords = False):
			
 
				+                 ):
			
 
				         # ----------------- Basic parameters -----------------
			
 
				         self.img_size = img_size
			
 
				-        self.box_format = box_format
			
 
				         self.pixel_mean = pixel_mean  # RGB format
			
 
				         self.pixel_std  = pixel_std    # RGB format
			
 
				-        self.normalize_coords = normalize_coords
			
 
				         self.color_format = 'rgb'
			
 
				         print("================= Pixel Statistics =================")
			
 
				         print("Pixel mean: {}".format(self.pixel_mean))
			
@@ -546,9 +532,8 @@ class SSDBaseTransform(object):
 
				         self.transform = Compose([
			
 
				             Resize(img_size=self.img_size),
			
 
				             ConvertColorFormat(self.color_format),
			
 
				-            Normalize(self.pixel_mean, self.pixel_std, self.normalize_coords),
			
 
				+            Normalize(self.pixel_mean, self.pixel_std),
			
 
				             ToTensor(),
			
 
				-            ConvertBoxFormat(self.box_format),
			
 
				         ])
			
 
				 
			
 
				 
			
@@ -569,15 +554,11 @@ if __name__ == "__main__":
 
				         ssd_augment = SSDAugmentation(img_size=416,
			
 
				                                       pixel_mean=[0., 0., 0.],
			
 
				                                       pixel_std=[255., 255., 255.],
			
 
				-                                      box_format="xyxy",
			
 
				-                                      normalize_coords=False,
			
 
				                                       )
			
 
				     else:
			
 
				         ssd_augment = SSDBaseTransform(img_size=416,
			
 
				                                        pixel_mean=[0., 0., 0.],
			
 
				                                        pixel_std=[255., 255., 255.],
			
 
				-                                       box_format="xyxy",
			
 
				-                                       normalize_coords=False,
			
 
				                                        )
			
 
				     
			
 
				     # 读取图像数据
			
--- a/yolo/dataset/data_augment/yolo_augment.py
+++ b/yolo/dataset/data_augment/yolo_augment.py
@@ -107,15 +107,12 @@ class YOLOAugmentation(object):
 
				                  affine_params=None,
			
 
				                  pixel_mean = [0., 0., 0.],
			
 
				                  pixel_std  = [255., 255., 255.],
			
 
				-                 box_format='xyxy',
			
 
				-                 normalize_coords=False):
			
 
				+                 ):
			
 
				         # Basic parameters
			
 
				         self.img_size   = img_size
			
 
				         self.pixel_mean = pixel_mean
			
 
				         self.pixel_std  = pixel_std
			
 
				-        self.box_format = box_format
			
 
				         self.affine_params = affine_params
			
 
				-        self.normalize_coords = normalize_coords
			
 
				         self.color_format = 'bgr'
			
 
				 
			
 
				     def __call__(self, image, target, mosaic=False):
			
@@ -166,17 +163,6 @@ class YOLOAugmentation(object):
 
				             target["boxes"] = torch.as_tensor(target["boxes"]).float()
			
 
				             target["labels"] = torch.as_tensor(target["labels"]).long()
			
 
				 
			
 
				-            # normalize coords
			
 
				-            if self.normalize_coords:
			
 
				-                target["boxes"][..., [0, 2]] /= img_w
			
 
				-                target["boxes"][..., [1, 3]] /= img_h
			
 
				-
			
 
				-            # xyxy -> xywh
			
 
				-            if self.box_format == "xywh":
			
 
				-                box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
			
 
				-                box_bwbh =  target["boxes"][..., 2:] - target["boxes"][..., :2]
			
 
				-                target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
			
 
				-
			
 
				         # --------------- Pad Image ---------------
			
 
				         img_h0, img_w0 = image.shape[1:]
			
 
				         pad_image = torch.ones([image.size(0), self.img_size, self.img_size]).float() * 114.
			
@@ -194,14 +180,11 @@ class YOLOBaseTransform(object):
 
				                  max_stride=32,
			
 
				                  pixel_mean = [0., 0., 0.],
			
 
				                  pixel_std  = [255., 255., 255.],
			
 
				-                 box_format='xyxy',
			
 
				-                 normalize_coords=False):
			
 
				+                 ):
			
 
				         self.img_size = img_size
			
 
				         self.max_stride = max_stride
			
 
				         self.pixel_mean = pixel_mean
			
 
				         self.pixel_std  = pixel_std
			
 
				-        self.box_format = box_format
			
 
				-        self.normalize_coords = normalize_coords
			
 
				         self.color_format = 'bgr'
			
 
				 
			
 
				     def __call__(self, image, target=None, mosaic=False):
			
@@ -225,17 +208,6 @@ class YOLOBaseTransform(object):
 
				             target["boxes"] = torch.as_tensor(target["boxes"]).float()
			
 
				             target["labels"] = torch.as_tensor(target["labels"]).long()
			
 
				 
			
 
				-            # normalize coords
			
 
				-            if self.normalize_coords:
			
 
				-                target["boxes"][..., [0, 2]] /= img_w
			
 
				-                target["boxes"][..., [1, 3]] /= img_h
			
 
				-            
			
 
				-            # xyxy -> xywh
			
 
				-            if self.box_format == "xywh":
			
 
				-                box_cxcy = (target["boxes"][..., :2] + target["boxes"][..., 2:]) * 0.5
			
 
				-                box_bwbh =  target["boxes"][..., 2:] - target["boxes"][..., :2]
			
 
				-                target["boxes"] = torch.cat([box_cxcy, box_bwbh], dim=-1)
			
 
				-
			
 
				         # --------------- Pad image ---------------
			
 
				         img_h0, img_w0 = image.shape[1:]
			
 
				         dh = img_h0 % self.max_stride
			
--- a/yolo/dataset/voc.py
+++ b/yolo/dataset/voc.py
@@ -238,8 +238,6 @@ if __name__ == "__main__":
 
				         def __init__(self) -> None:
			
 
				             self.max_stride = 32
			
 
				             # ---------------- Data process config ----------------
			
 
				-            self.box_format = 'xywh'
			
 
				-            self.normalize_coords = False
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
 
				             self.copy_paste  = 0.0
			
@@ -290,21 +288,8 @@ if __name__ == "__main__":
 
				         labels = target["labels"]
			
 
				 
			
 
				         for box, label in zip(boxes, labels):
			
 
				-            if cfg.box_format == 'xyxy':
			
 
				-                x1, y1, x2, y2 = box
			
 
				-            elif cfg.box_format == 'xywh':
			
 
				-                cx, cy, bw, bh = box
			
 
				-                x1 = cx - 0.5 * bw
			
 
				-                y1 = cy - 0.5 * bh
			
 
				-                x2 = cx + 0.5 * bw
			
 
				-                y2 = cy + 0.5 * bh
			
 
				+            x1, y1, x2, y2 = box
			
 
				             
			
 
				-            if cfg.normalize_coords:
			
 
				-                x1 *= img_w
			
 
				-                y1 *= img_h
			
 
				-                x2 *= img_w
			
 
				-                y2 *= img_h
			
 
				-
			
 
				             cls_id = int(label)
			
 
				             color = class_colors[cls_id]
			
 
				             # class name