2 years ago · aacce86ed1
--- a/config/data_config/transform_config.py
+++ b/config/data_config/transform_config.py
@@ -13,6 +13,7 @@ yolov5_huge_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.2,
			
@@ -32,6 +33,7 @@ yolov5_large_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.15,
			
@@ -51,6 +53,7 @@ yolov5_medium_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.10,
			
@@ -70,6 +73,7 @@ yolov5_small_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.0,
			
@@ -89,6 +93,7 @@ yolov5_nano_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.0,
			
@@ -108,6 +113,7 @@ yolov5_pico_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 0.5,
			
 
				     'mixup_prob': 0.0,
			
@@ -129,6 +135,7 @@ yolox_huge_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 1.0,
			
@@ -148,6 +155,7 @@ yolox_large_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 1.0,
			
@@ -167,6 +175,7 @@ yolox_medium_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 1.0,
			
@@ -186,6 +195,7 @@ yolox_small_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 1.0,
			
@@ -205,6 +215,7 @@ yolox_nano_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 1.0,
			
 
				     'mixup_prob': 0.5,
			
@@ -224,6 +235,7 @@ yolox_pico_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 0.5,
			
 
				     'mixup_prob': 0.0,
			
@@ -245,6 +257,7 @@ rtrdet_large_trans_config = {
 
				     'hsv_h': 0.015,
			
 
				     'hsv_s': 0.7,
			
 
				     'hsv_v': 0.4,
			
 
				+    'use_ablu': True,
			
 
				     # Mosaic & Mixup
			
 
				     'mosaic_prob': 0.0,
			
 
				     'mixup_prob': 0.0,
			
@@ -257,6 +270,7 @@ rtrdet_large_trans_config = {
 
				 # ----------------------- SSD-Style Transform -----------------------
			
 
				 ssd_trans_config = {
			
 
				     'aug_type': 'ssd',
			
 
				+    'use_ablu': False,
			
 
				     # Mosaic & Mixup are not used for SSD-style augmentation
			
 
				     'mosaic_prob': 0.,
			
 
				     'mixup_prob': 0.,
			
--- a/dataset/build.py
+++ b/dataset/build.py
@@ -91,7 +91,8 @@ def build_transform(args, trans_config, max_stride=32, is_train=False):
 
				         if is_train:
			
 
				             transform = YOLOv5Augmentation(
			
 
				                 img_size=args.img_size,
			
 
				-                trans_config=trans_config
			
 
				+                trans_config=trans_config,
			
 
				+                use_ablu=trans_config['use_ablu']
			
 
				                 )
			
 
				         else:
			
 
				             transform = YOLOv5BaseTransform(
			
--- a/dataset/data_augment/yolov5_augment.py
+++ b/dataset/data_augment/yolov5_augment.py
@@ -3,6 +3,7 @@ import cv2
 
				 import math
			
 
				 import numpy as np
			
 
				 import torch
			
 
				+import albumentations as albu
			
 
				 
			
 
				 
			
 
				 # ------------------------- Basic augmentations -------------------------
			
@@ -94,6 +95,31 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
 
				     img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
			
 
				     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
			
 
				 
			
 
				+## Ablu transform
			
 
				+class Albumentations(object):
			
 
				+    def __init__(self, img_size=640):
			
 
				+        self.img_size = img_size
			
 
				+        self.transform = albu.Compose(
			
 
				+            [albu.Blur(p=0.01),
			
 
				+             albu.MedianBlur(p=0.01),
			
 
				+             albu.ToGray(p=0.01),
			
 
				+             albu.CLAHE(p=0.01),
			
 
				+             ],
			
 
				+             bbox_params=albu.BboxParams(format='pascal_voc', label_fields=['labels'])
			
 
				+        )
			
 
				+
			
 
				+    def __call__(self, image, target=None):
			
 
				+        labels = target['labels']
			
 
				+        bboxes = target['boxes']
			
 
				+        if len(labels) > 0:
			
 
				+            new = self.transform(image=image, bboxes=bboxes, labels=labels)
			
 
				+            if len(new["labels"]) > 0:
			
 
				+                image = new['image']
			
 
				+                target['labels'] = np.array(new["labels"], dtype=labels.dtype)
			
 
				+                target['boxes'] = np.array(new["bboxes"], dtype=bboxes.dtype)
			
 
				+
			
 
				+        return image, target
			
 
				+
			
 
				 
			
 
				 # ------------------------- Strong augmentations -------------------------
			
 
				 ## YOLOv5-Mosaic
			
@@ -303,17 +329,16 @@ def yolox_mixup_augment(origin_img, origin_target, new_img, new_target, img_size
 
				 # ------------------------- Preprocessers -------------------------
			
 
				 ## YOLOv5-style Transform for Train
			
 
				 class YOLOv5Augmentation(object):
			
 
				-    def __init__(self, 
			
 
				-                 img_size=640,
			
 
				-                 trans_config=None):
			
 
				-        self.trans_config = trans_config
			
 
				+    def __init__(self, img_size=640, trans_config=None, use_ablu=False):
			
 
				+        # Basic parameters
			
 
				         self.img_size = img_size
			
 
				-
			
 
				+        self.trans_config = trans_config
			
 
				+        # Albumentations
			
 
				+        self.ablu_trans = Albumentations(img_size) if use_ablu else None
			
 
				 
			
 
				     def __call__(self, image, target, mosaic=False):
			
 
				-        # resize
			
 
				+        # --------------- Keep ratio Resize ---------------
			
 
				         img_h0, img_w0 = image.shape[:2]
			
 
				-
			
 
				         r = self.img_size / max(img_h0, img_w0)
			
 
				         if r != 1: 
			
 
				             interp = cv2.INTER_LINEAR
			
@@ -321,21 +346,32 @@ class YOLOv5Augmentation(object):
 
				             img = cv2.resize(image, new_shape, interpolation=interp)
			
 
				         else:
			
 
				             img = image
			
 
				-
			
 
				         img_h, img_w = img.shape[:2]
			
 
				 
			
 
				-        # hsv augment
			
 
				+        # --------------- Filter bad targets ---------------
			
 
				+        tgt_boxes_wh = target["boxes"][..., 2:] - target["boxes"][..., :2]
			
 
				+        min_tgt_size = np.min(tgt_boxes_wh, axis=-1)
			
 
				+        keep = (min_tgt_size > 1)
			
 
				+        target["boxes"]  = target["boxes"][keep]
			
 
				+        target["labels"] = target["labels"][keep]
			
 
				+
			
 
				+        # --------------- Albumentations ---------------
			
 
				+        if self.ablu_trans is not None:
			
 
				+            img, target = self.ablu_trans(img, target)
			
 
				+
			
 
				+        # --------------- HSV augmentations ---------------
			
 
				         augment_hsv(img, hgain=self.trans_config['hsv_h'], 
			
 
				                     sgain=self.trans_config['hsv_s'], 
			
 
				                     vgain=self.trans_config['hsv_v'])
			
 
				         
			
 
				+        # --------------- Spatial augmentations ---------------
			
 
				+        ## Random perspective
			
 
				         if not mosaic:
			
 
				             # rescale bbox
			
 
				             boxes_ = target["boxes"].copy()
			
 
				             boxes_[:, [0, 2]] = boxes_[:, [0, 2]] / img_w0 * img_w
			
 
				             boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
			
 
				             target["boxes"] = boxes_
			
 
				-
			
 
				             # spatial augment
			
 
				             target_ = np.concatenate(
			
 
				                 (target['labels'][..., None], target['boxes']), axis=-1)
			
@@ -349,8 +385,7 @@ class YOLOv5Augmentation(object):
 
				                 )
			
 
				             target['boxes'] = target_[..., 1:]
			
 
				             target['labels'] = target_[..., 0]
			
 
				-        
			
 
				-        # random flip
			
 
				+        ## Random flip
			
 
				         if random.random() < 0.5:
			
 
				             w = img.shape[1]
			
 
				             img = np.fliplr(img).copy()
			
@@ -358,17 +393,14 @@ class YOLOv5Augmentation(object):
 
				             boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
			
 
				             target["boxes"] = boxes
			
 
				 
			
 
				-        # to tensor
			
 
				+        # --------------- To torch.Tensor ---------------
			
 
				         img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
			
 
				-
			
 
				         if target is not None:
			
 
				             target["boxes"] = torch.as_tensor(target["boxes"]).float()
			
 
				             target["labels"] = torch.as_tensor(target["labels"]).long()
			
 
				 
			
 
				-        # pad img
			
 
				+        # --------------- Pad image ---------------
			
 
				         img_h0, img_w0 = img_tensor.shape[1:]
			
 
				-        assert max(img_h0, img_w0) <= self.img_size
			
 
				-
			
 
				         pad_image = torch.ones([img_tensor.size(0), self.img_size, self.img_size]).float() * 114.
			
 
				         pad_image[:, :img_h0, :img_w0] = img_tensor
			
 
				         dh = self.img_size - img_h0
			
@@ -384,23 +416,17 @@ class YOLOv5BaseTransform(object):
 
				 
			
 
				 
			
 
				     def __call__(self, image, target=None, mosaic=False):
			
 
				-        # resize
			
 
				+        # --------------- Keep ratio Resize ---------------
			
 
				+        ## Resize image
			
 
				         img_h0, img_w0 = image.shape[:2]
			
 
				-
			
 
				         r = self.img_size / max(img_h0, img_w0)
			
 
				-        # r = min(r, 1.0) # only scale down, do not scale up (for better val mAP)
			
 
				         if r != 1: 
			
 
				             new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
			
 
				             img = cv2.resize(image, new_shape, interpolation=cv2.INTER_LINEAR)
			
 
				         else:
			
 
				             img = image
			
 
				-
			
 
				         img_h, img_w = img.shape[:2]
			
 
				-
			
 
				-        # to tensor
			
 
				-        img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
			
 
				-
			
 
				-        # rescale bboxes
			
 
				+        ## Rescale bboxes
			
 
				         if target is not None:
			
 
				             # rescale bbox
			
 
				             boxes_ = target["boxes"].copy()
			
@@ -408,11 +434,13 @@ class YOLOv5BaseTransform(object):
 
				             boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
			
 
				             target["boxes"] = boxes_
			
 
				 
			
 
				-            # to tensor
			
 
				+        # --------------- To torch.Tensor ---------------
			
 
				+        img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
			
 
				+        if target is not None:
			
 
				             target["boxes"] = torch.as_tensor(target["boxes"]).float()
			
 
				             target["labels"] = torch.as_tensor(target["labels"]).long()
			
 
				 
			
 
				-        # pad img
			
 
				+        # --------------- Pad image ---------------
			
 
				         img_h0, img_w0 = img_tensor.shape[1:]
			
 
				         dh = img_h0 % self.max_stride
			
 
				         dw = img_w0 % self.max_stride
			
--- a/dataset/voc.py
+++ b/dataset/voc.py
@@ -322,11 +322,12 @@ if __name__ == "__main__":
 
				         'hsv_h': 0.015,
			
 
				         'hsv_s': 0.7,
			
 
				         'hsv_v': 0.4,
			
 
				+        'use_ablu': True,
			
 
				         # Mosaic & Mixup
			
 
				         'mosaic_prob': 1.0,
			
 
				         'mixup_prob': 1.0,
			
 
				         'mosaic_type': 'yolov5_mosaic',
			
 
				-        'mixup_type': 'yolox_mixup',
			
 
				+        'mixup_type': 'yolov5_mixup',
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
 
				     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)
			
--- a/models/detectors/yolov8/yolov8_head.py
+++ b/models/detectors/yolov8/yolov8_head.py
@@ -78,8 +78,8 @@ class MultiLevelHead(nn.Module):
 
				         self.multi_level_heads = nn.ModuleList(
			
 
				             [SingleLevelHead(
			
 
				                 in_dims[level],
			
 
				-                max(in_dims[0], num_classes),        # cls head out_dim
			
 
				-                max(in_dims[0]//4, 16, 4*reg_max),   # reg head out_dim
			
 
				+                max(in_dims[0], min(num_classes, 100)), # cls head out_dim
			
 
				+                max(in_dims[0]//4, 16, 4*reg_max),      # reg head out_dim
			
 
				                 cfg['num_cls_head'],
			
 
				                 cfg['num_reg_head'],
			
 
				                 cfg['head_act'],
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,3 +23,5 @@ onnxruntime
 
				 openvino
			
 
				 
			
 
				 loguru
			
 
				+
			
 
				+albumentations