2 年之前 · 8e1118bdba
--- a/config/model_config/yolox2_config.py
+++ b/config/model_config/yolox2_config.py
@@ -48,6 +48,98 @@ yolox2_cfg = {
 
				         'trainer_type': 'rtcdet',
			
 
				     },
			
 
				 
			
 
				+    'yolox2_s':{
			
 
				+        # ---------------- Model config ----------------
			
 
				+        ## Backbone
			
 
				+        'bk_act': 'silu',
			
 
				+        'bk_norm': 'BN',
			
 
				+        'bk_depthwise': False,
			
 
				+        'width': 0.50,
			
 
				+        'depth': 0.34,
			
 
				+        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				+        'max_stride': 32,
			
 
				+        ## Neck: SPP
			
 
				+        'neck': 'sppf',
			
 
				+        'neck_expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				+        'neck_act': 'silu',
			
 
				+        'neck_norm': 'BN',
			
 
				+        'neck_depthwise': False,
			
 
				+        ## Neck: PaFPN
			
 
				+        'fpn': 'yolox2_pafpn',
			
 
				+        'fpn_act': 'silu',
			
 
				+        'fpn_norm': 'BN',
			
 
				+        'fpn_depthwise': False,
			
 
				+        ## Head
			
 
				+        'head': 'decoupled_head',
			
 
				+        'head_act': 'silu',
			
 
				+        'head_norm': 'BN',
			
 
				+        'num_cls_head': 2,
			
 
				+        'num_reg_head': 2,
			
 
				+        'head_depthwise': False,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        ## input
			
 
				+        'multi_scale': [0.7, 1.25],   # 448 -> 800
			
 
				+        'trans_type': 'yolox_small',
			
 
				+        # ---------------- Assignment config ----------------
			
 
				+        ## matcher
			
 
				+        'matcher': "aligned_simota",
			
 
				+        'matcher_hpy': {'soft_center_radius': 3.0,
			
 
				+                        'topk_candidates': 13},
			
 
				+        # ---------------- Loss config ----------------
			
 
				+        ## loss weight
			
 
				+        'loss_cls_weight': 1.0,
			
 
				+        'loss_box_weight': 2.0,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        'trainer_type': 'rtcdet',
			
 
				+    },
			
 
				+
			
 
				+    'yolox2_m':{
			
 
				+        # ---------------- Model config ----------------
			
 
				+        ## Backbone
			
 
				+        'bk_act': 'silu',
			
 
				+        'bk_norm': 'BN',
			
 
				+        'bk_depthwise': False,
			
 
				+        'width': 0.75,
			
 
				+        'depth': 0.67,
			
 
				+        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				+        'max_stride': 32,
			
 
				+        ## Neck: SPP
			
 
				+        'neck': 'sppf',
			
 
				+        'neck_expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				+        'neck_act': 'silu',
			
 
				+        'neck_norm': 'BN',
			
 
				+        'neck_depthwise': False,
			
 
				+        ## Neck: PaFPN
			
 
				+        'fpn': 'yolox2_pafpn',
			
 
				+        'fpn_act': 'silu',
			
 
				+        'fpn_norm': 'BN',
			
 
				+        'fpn_depthwise': False,
			
 
				+        ## Head
			
 
				+        'head': 'decoupled_head',
			
 
				+        'head_act': 'silu',
			
 
				+        'head_norm': 'BN',
			
 
				+        'num_cls_head': 2,
			
 
				+        'num_reg_head': 2,
			
 
				+        'head_depthwise': False,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        ## input
			
 
				+        'multi_scale': [0.7, 1.25],   # 448 -> 800
			
 
				+        'trans_type': 'yolox_medium',
			
 
				+        # ---------------- Assignment config ----------------
			
 
				+        ## matcher
			
 
				+        'matcher': "aligned_simota",
			
 
				+        'matcher_hpy': {'soft_center_radius': 3.0,
			
 
				+                        'topk_candidates': 13},
			
 
				+        # ---------------- Loss config ----------------
			
 
				+        ## loss weight
			
 
				+        'loss_cls_weight': 1.0,
			
 
				+        'loss_box_weight': 2.0,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        'trainer_type': 'rtcdet',
			
 
				+    },
			
 
				+
			
 
				     'yolox2_l':{
			
 
				         # ---------------- Model config ----------------
			
 
				         ## Backbone
			
@@ -94,4 +186,50 @@ yolox2_cfg = {
 
				         'trainer_type': 'rtcdet',
			
 
				     },
			
 
				 
			
 
				+    'yolox2_x':{
			
 
				+        # ---------------- Model config ----------------
			
 
				+        ## Backbone
			
 
				+        'bk_act': 'silu',
			
 
				+        'bk_norm': 'BN',
			
 
				+        'bk_depthwise': False,
			
 
				+        'width': 1.25,
			
 
				+        'depth': 1.34,
			
 
				+        'stride': [8, 16, 32],  # P3, P4, P5
			
 
				+        'max_stride': 32,
			
 
				+        ## Neck: SPP
			
 
				+        'neck': 'sppf',
			
 
				+        'neck_expand_ratio': 0.5,
			
 
				+        'pooling_size': 5,
			
 
				+        'neck_act': 'silu',
			
 
				+        'neck_norm': 'BN',
			
 
				+        'neck_depthwise': False,
			
 
				+        ## Neck: PaFPN
			
 
				+        'fpn': 'yolox2_pafpn',
			
 
				+        'fpn_act': 'silu',
			
 
				+        'fpn_norm': 'BN',
			
 
				+        'fpn_depthwise': False,
			
 
				+        ## Head
			
 
				+        'head': 'decoupled_head',
			
 
				+        'head_act': 'silu',
			
 
				+        'head_norm': 'BN',
			
 
				+        'num_cls_head': 2,
			
 
				+        'num_reg_head': 2,
			
 
				+        'head_depthwise': False,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        ## input
			
 
				+        'multi_scale': [0.7, 1.25],   # 448 -> 800
			
 
				+        'trans_type': 'yolox_huge',
			
 
				+        # ---------------- Assignment config ----------------
			
 
				+        ## matcher
			
 
				+        'matcher': "aligned_simota",
			
 
				+        'matcher_hpy': {'soft_center_radius': 3.0,
			
 
				+                        'topk_candidates': 13},
			
 
				+        # ---------------- Loss config ----------------
			
 
				+        ## loss weight
			
 
				+        'loss_cls_weight': 1.0,
			
 
				+        'loss_box_weight': 2.0,
			
 
				+        # ---------------- Train config ----------------
			
 
				+        'trainer_type': 'rtcdet',
			
 
				+    },
			
 
				+
			
 
				 }
			
--- a/models/detectors/yolov8/yolov8_head.py
+++ b/models/detectors/yolov8/yolov8_head.py
@@ -9,7 +9,15 @@ except:
 
				 
			
 
				 # Single-level Head
			
 
				 class SingleLevelHead(nn.Module):
			
 
				-    def __init__(self, in_dim, cls_head_dim, reg_head_dim, num_cls_head, num_reg_head, act_type, norm_type, depthwise):
			
 
				+    def __init__(self,
			
 
				+                 in_dim       :int  = 256,
			
 
				+                 cls_head_dim :int  = 256,
			
 
				+                 reg_head_dim :int  = 256,
			
 
				+                 num_cls_head :int  = 2,
			
 
				+                 num_reg_head :int  = 2,
			
 
				+                 act_type     :str  = "silu",
			
 
				+                 norm_type    :str  = "BN",
			
 
				+                 depthwise    :bool = False):
			
 
				         super().__init__()
			
 
				         # --------- Basic Parameters ----------
			
 
				         self.in_dim = in_dim
			
@@ -78,27 +86,24 @@ class SingleLevelHead(nn.Module):
 
				 
			
 
				         return cls_feats, reg_feats
			
 
				     
			
 
				-
			
 
				 # Multi-level Head
			
 
				 class MultiLevelHead(nn.Module):
			
 
				     def __init__(self, cfg, in_dims, num_levels=3, num_classes=80, reg_max=16):
			
 
				         super().__init__()
			
 
				         ## ----------- Network Parameters -----------
			
 
				         self.multi_level_heads = nn.ModuleList(
			
 
				-            [SingleLevelHead(
			
 
				-                in_dims[level],
			
 
				-                max(in_dims[0], min(num_classes, 100)), # cls head out_dim
			
 
				-                max(in_dims[0]//4, 16, 4*reg_max),      # reg head out_dim
			
 
				-                cfg['num_cls_head'],
			
 
				-                cfg['num_reg_head'],
			
 
				-                cfg['head_act'],
			
 
				-                cfg['head_norm'],
			
 
				-                cfg['head_depthwise'])
			
 
				-                for level in range(num_levels)
			
 
				-            ])
			
 
				+            [SingleLevelHead(in_dim       = in_dims[level],
			
 
				+                             cls_head_dim = max(in_dims[0], min(num_classes, 100)),
			
 
				+                             reg_head_dim = max(in_dims[0]//4, 16, 4*reg_max),
			
 
				+                             num_cls_head = cfg['num_cls_head'],
			
 
				+                             num_reg_head = cfg['num_reg_head'],
			
 
				+                             act_type     = cfg['head_act'],
			
 
				+                             norm_type    = cfg['head_norm'],
			
 
				+                             depthwise    = cfg['head_depthwise'])
			
 
				+                             for level in range(num_levels)
			
 
				+                             ])
			
 
				         # --------- Basic Parameters ----------
			
 
				         self.in_dims = in_dims
			
 
				-
			
 
				         self.cls_head_dim = self.multi_level_heads[0].cls_head_dim
			
 
				         self.reg_head_dim = self.multi_level_heads[0].reg_head_dim
			
 
				 
			
--- a/models/detectors/yolov8/yolov8_pred.py
+++ b/models/detectors/yolov8/yolov8_pred.py
@@ -6,7 +6,12 @@ import torch.nn.functional as F
 
				 
			
 
				 # Single-level pred layer
			
 
				 class SingleLevelPredLayer(nn.Module):
			
 
				-    def __init__(self, cls_dim, reg_dim, stride, num_classes, num_coords=4):
			
 
				+    def __init__(self,
			
 
				+                 cls_dim      :int = 256,
			
 
				+                 reg_dim      :int = 256,
			
 
				+                 stride       :int = 32,
			
 
				+                 num_classes  :int = 80,
			
 
				+                 num_coords   :int = 4):
			
 
				         super().__init__()
			
 
				         # --------- Basic Parameters ----------
			
 
				         self.stride = stride
			
@@ -57,14 +62,13 @@ class MultiLevelPredLayer(nn.Module):
 
				         # ----------- Network Parameters -----------
			
 
				         ## pred layers
			
 
				         self.multi_level_preds = nn.ModuleList(
			
 
				-            [SingleLevelPredLayer(
			
 
				-                cls_dim,
			
 
				-                reg_dim,
			
 
				-                strides[l],
			
 
				-                num_classes,
			
 
				-                num_coords * self.reg_max)
			
 
				-                for l in range(num_levels)
			
 
				-            ])
			
 
				+            [SingleLevelPredLayer(cls_dim     = cls_dim,
			
 
				+                                  reg_dim     = reg_dim,
			
 
				+                                  stride      = strides[level],
			
 
				+                                  num_classes = num_classes,
			
 
				+                                  num_coords  = num_coords * reg_max)
			
 
				+                                  for level in range(num_levels)
			
 
				+                                  ])
			
 
				         ## proj conv
			
 
				         proj_init = torch.arange(reg_max, dtype=torch.float)
			
 
				         self.proj_conv = nn.Conv2d(self.reg_max, 1, kernel_size=1, bias=False).requires_grad_(False)
			
--- a/models/detectors/yolox2/README.md
+++ b/models/detectors/yolox2/README.md
@@ -9,7 +9,7 @@
 
				 | YOLOX2-X | 8xb16 |  640  |                        |                   |                   |                    |  |
			
 
				 
			
 
				 - For training, we train YOLOX2 series with 300 epochs on COCO.
			
 
				-- For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
			
 
				+- For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the YOLOX.
			
 
				 - For optimizer, we use AdamW with weight decay 0.05 and base per image lr 0.001 / 64,.
			
 
				 - For learning rate scheduler, we use Linear decay scheduler.