Browse Source

add Albu trans

yjh0410 2 years ago
parent
commit
aacce86ed1

+ 14 - 0
config/data_config/transform_config.py

@@ -13,6 +13,7 @@ yolov5_huge_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.2,
@@ -32,6 +33,7 @@ yolov5_large_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.15,
@@ -51,6 +53,7 @@ yolov5_medium_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.10,
@@ -70,6 +73,7 @@ yolov5_small_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.0,
@@ -89,6 +93,7 @@ yolov5_nano_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.0,
@@ -108,6 +113,7 @@ yolov5_pico_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 0.5,
     'mixup_prob': 0.0,
@@ -129,6 +135,7 @@ yolox_huge_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 1.0,
@@ -148,6 +155,7 @@ yolox_large_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 1.0,
@@ -167,6 +175,7 @@ yolox_medium_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 1.0,
@@ -186,6 +195,7 @@ yolox_small_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 1.0,
@@ -205,6 +215,7 @@ yolox_nano_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
     'mixup_prob': 0.5,
@@ -224,6 +235,7 @@ yolox_pico_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': False,
     # Mosaic & Mixup
     'mosaic_prob': 0.5,
     'mixup_prob': 0.0,
@@ -245,6 +257,7 @@ rtrdet_large_trans_config = {
     'hsv_h': 0.015,
     'hsv_s': 0.7,
     'hsv_v': 0.4,
+    'use_ablu': True,
     # Mosaic & Mixup
     'mosaic_prob': 0.0,
     'mixup_prob': 0.0,
@@ -257,6 +270,7 @@ rtrdet_large_trans_config = {
 # ----------------------- SSD-Style Transform -----------------------
 ssd_trans_config = {
     'aug_type': 'ssd',
+    'use_ablu': False,
     # Mosaic & Mixup are not used for SSD-style augmentation
     'mosaic_prob': 0.,
     'mixup_prob': 0.,

+ 2 - 1
dataset/build.py

@@ -91,7 +91,8 @@ def build_transform(args, trans_config, max_stride=32, is_train=False):
         if is_train:
             transform = YOLOv5Augmentation(
                 img_size=args.img_size,
-                trans_config=trans_config
+                trans_config=trans_config,
+                use_ablu=trans_config['use_ablu']
                 )
         else:
             transform = YOLOv5BaseTransform(

+ 56 - 28
dataset/data_augment/yolov5_augment.py

@@ -3,6 +3,7 @@ import cv2
 import math
 import numpy as np
 import torch
+import albumentations as albu
 
 
 # ------------------------- Basic augmentations -------------------------
@@ -94,6 +95,31 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
     img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
     cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img)  # no return needed
 
+## Ablu transform
+class Albumentations(object):
+    def __init__(self, img_size=640):
+        self.img_size = img_size
+        self.transform = albu.Compose(
+            [albu.Blur(p=0.01),
+             albu.MedianBlur(p=0.01),
+             albu.ToGray(p=0.01),
+             albu.CLAHE(p=0.01),
+             ],
+             bbox_params=albu.BboxParams(format='pascal_voc', label_fields=['labels'])
+        )
+
+    def __call__(self, image, target=None):
+        labels = target['labels']
+        bboxes = target['boxes']
+        if len(labels) > 0:
+            new = self.transform(image=image, bboxes=bboxes, labels=labels)
+            if len(new["labels"]) > 0:
+                image = new['image']
+                target['labels'] = np.array(new["labels"], dtype=labels.dtype)
+                target['boxes'] = np.array(new["bboxes"], dtype=bboxes.dtype)
+
+        return image, target
+
 
 # ------------------------- Strong augmentations -------------------------
 ## YOLOv5-Mosaic
@@ -303,17 +329,16 @@ def yolox_mixup_augment(origin_img, origin_target, new_img, new_target, img_size
 # ------------------------- Preprocessers -------------------------
 ## YOLOv5-style Transform for Train
 class YOLOv5Augmentation(object):
-    def __init__(self, 
-                 img_size=640,
-                 trans_config=None):
-        self.trans_config = trans_config
+    def __init__(self, img_size=640, trans_config=None, use_ablu=False):
+        # Basic parameters
         self.img_size = img_size
-
+        self.trans_config = trans_config
+        # Albumentations
+        self.ablu_trans = Albumentations(img_size) if use_ablu else None
 
     def __call__(self, image, target, mosaic=False):
-        # resize
+        # --------------- Keep ratio Resize ---------------
         img_h0, img_w0 = image.shape[:2]
-
         r = self.img_size / max(img_h0, img_w0)
         if r != 1: 
             interp = cv2.INTER_LINEAR
@@ -321,21 +346,32 @@ class YOLOv5Augmentation(object):
             img = cv2.resize(image, new_shape, interpolation=interp)
         else:
             img = image
-
         img_h, img_w = img.shape[:2]
 
-        # hsv augment
+        # --------------- Filter bad targets ---------------
+        tgt_boxes_wh = target["boxes"][..., 2:] - target["boxes"][..., :2]
+        min_tgt_size = np.min(tgt_boxes_wh, axis=-1)
+        keep = (min_tgt_size > 1)
+        target["boxes"]  = target["boxes"][keep]
+        target["labels"] = target["labels"][keep]
+
+        # --------------- Albumentations ---------------
+        if self.ablu_trans is not None:
+            img, target = self.ablu_trans(img, target)
+
+        # --------------- HSV augmentations ---------------
         augment_hsv(img, hgain=self.trans_config['hsv_h'], 
                     sgain=self.trans_config['hsv_s'], 
                     vgain=self.trans_config['hsv_v'])
         
+        # --------------- Spatial augmentations ---------------
+        ## Random perspective
         if not mosaic:
             # rescale bbox
             boxes_ = target["boxes"].copy()
             boxes_[:, [0, 2]] = boxes_[:, [0, 2]] / img_w0 * img_w
             boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
             target["boxes"] = boxes_
-
             # spatial augment
             target_ = np.concatenate(
                 (target['labels'][..., None], target['boxes']), axis=-1)
@@ -349,8 +385,7 @@ class YOLOv5Augmentation(object):
                 )
             target['boxes'] = target_[..., 1:]
             target['labels'] = target_[..., 0]
-        
-        # random flip
+        ## Random flip
         if random.random() < 0.5:
             w = img.shape[1]
             img = np.fliplr(img).copy()
@@ -358,17 +393,14 @@ class YOLOv5Augmentation(object):
             boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
             target["boxes"] = boxes
 
-        # to tensor
+        # --------------- To torch.Tensor ---------------
         img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
-
         if target is not None:
             target["boxes"] = torch.as_tensor(target["boxes"]).float()
             target["labels"] = torch.as_tensor(target["labels"]).long()
 
-        # pad img
+        # --------------- Pad image ---------------
         img_h0, img_w0 = img_tensor.shape[1:]
-        assert max(img_h0, img_w0) <= self.img_size
-
         pad_image = torch.ones([img_tensor.size(0), self.img_size, self.img_size]).float() * 114.
         pad_image[:, :img_h0, :img_w0] = img_tensor
         dh = self.img_size - img_h0
@@ -384,23 +416,17 @@ class YOLOv5BaseTransform(object):
 
 
     def __call__(self, image, target=None, mosaic=False):
-        # resize
+        # --------------- Keep ratio Resize ---------------
+        ## Resize image
         img_h0, img_w0 = image.shape[:2]
-
         r = self.img_size / max(img_h0, img_w0)
-        # r = min(r, 1.0) # only scale down, do not scale up (for better val mAP)
         if r != 1: 
             new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
             img = cv2.resize(image, new_shape, interpolation=cv2.INTER_LINEAR)
         else:
             img = image
-
         img_h, img_w = img.shape[:2]
-
-        # to tensor
-        img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
-
-        # rescale bboxes
+        ## Rescale bboxes
         if target is not None:
             # rescale bbox
             boxes_ = target["boxes"].copy()
@@ -408,11 +434,13 @@ class YOLOv5BaseTransform(object):
             boxes_[:, [1, 3]] = boxes_[:, [1, 3]] / img_h0 * img_h
             target["boxes"] = boxes_
 
-            # to tensor
+        # --------------- To torch.Tensor ---------------
+        img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
+        if target is not None:
             target["boxes"] = torch.as_tensor(target["boxes"]).float()
             target["labels"] = torch.as_tensor(target["labels"]).long()
 
-        # pad img
+        # --------------- Pad image ---------------
         img_h0, img_w0 = img_tensor.shape[1:]
         dh = img_h0 % self.max_stride
         dw = img_w0 % self.max_stride

+ 2 - 1
dataset/voc.py

@@ -322,11 +322,12 @@ if __name__ == "__main__":
         'hsv_h': 0.015,
         'hsv_s': 0.7,
         'hsv_v': 0.4,
+        'use_ablu': True,
         # Mosaic & Mixup
         'mosaic_prob': 1.0,
         'mixup_prob': 1.0,
         'mosaic_type': 'yolov5_mosaic',
-        'mixup_type': 'yolox_mixup',
+        'mixup_type': 'yolov5_mixup',
         'mixup_scale': [0.5, 1.5]
     }
     transform, trans_cfg = build_transform(args, trans_config, 32, args.is_train)

+ 2 - 2
models/detectors/yolov8/yolov8_head.py

@@ -78,8 +78,8 @@ class MultiLevelHead(nn.Module):
         self.multi_level_heads = nn.ModuleList(
             [SingleLevelHead(
                 in_dims[level],
-                max(in_dims[0], num_classes),        # cls head out_dim
-                max(in_dims[0]//4, 16, 4*reg_max),   # reg head out_dim
+                max(in_dims[0], min(num_classes, 100)), # cls head out_dim
+                max(in_dims[0]//4, 16, 4*reg_max),      # reg head out_dim
                 cfg['num_cls_head'],
                 cfg['num_reg_head'],
                 cfg['head_act'],

+ 2 - 0
requirements.txt

@@ -23,3 +23,5 @@ onnxruntime
 openvino
 
 loguru
+
+albumentations