yjh0410 1 year ago
parent
commit
28a4b98179

+ 3 - 3
yolo/config/__init__.py

@@ -6,7 +6,7 @@ from .yolov5_config     import build_yolov5_config
 from .yolox_config  import build_yolox_config
 from .yolov6_config     import build_yolov6_config
 from .yolov8_config     import build_yolov8_config
-from .yolov9_config     import build_gelan_config
+from .yolov9_config     import build_yolov9_config
 from .rtdetr_config     import build_rtdetr_config
 
 
@@ -28,8 +28,8 @@ def build_config(args):
         cfg = build_yolov6_config(args)
     elif 'yolov8' in args.model:
         cfg = build_yolov8_config(args)
-    elif 'gelan' in args.model:
-        cfg = build_gelan_config(args)
+    elif 'yolov9' in args.model:
+        cfg = build_yolov9_config(args)
         
     # ----------- RT-DETR -----------
     elif 'rtdetr' in args.model:

+ 6 - 6
yolo/config/yolov5_config.py

@@ -24,7 +24,7 @@ class Yolov5BaseConfig(object):
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
         self.num_levels = 3
-        self.scale      = "b"
+        self.model_scale = "l"
         ## Backbone
         self.use_pretrained = True
         ## Head
@@ -111,7 +111,7 @@ class Yolov5NConfig(Yolov5BaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.25
         self.depth = 0.34
-        self.scale = "n"
+        self.model_scale = "n"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -125,7 +125,7 @@ class Yolov5SConfig(Yolov5BaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.50
         self.depth = 0.34
-        self.scale = "s"
+        self.model_scale = "s"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -139,7 +139,7 @@ class Yolov5MConfig(Yolov5BaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.75
         self.depth = 0.67
-        self.scale = "m"
+        self.model_scale = "m"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -153,7 +153,7 @@ class Yolov5LConfig(Yolov5BaseConfig):
         # ---------------- Model config ----------------
         self.width = 1.0
         self.depth = 1.0
-        self.scale = "l"
+        self.model_scale = "l"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -167,7 +167,7 @@ class Yolov5XConfig(Yolov5BaseConfig):
         # ---------------- Model config ----------------
         self.width = 1.25
         self.depth = 1.34
-        self.scale = "x"
+        self.model_scale = "x"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0

+ 15 - 29
yolo/config/yolov8_config.py

@@ -19,35 +19,21 @@ def build_yolov8_config(args):
 class Yolov8BaseConfig(object):
     def __init__(self) -> None:
         # ---------------- Model config ----------------
-        self.width    = 1.0
-        self.depth    = 1.0
-        self.ratio    = 1.0
-        self.reg_max  = 16
+        self.model_scale = "l"
+        self.width   = 1.0
+        self.depth   = 1.0
+        self.ratio   = 1.0
+        self.reg_max = 16
+
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
-        self.num_levels = 3
-        self.scale      = "b"
+
         ## Backbone
-        self.bk_act   = 'silu'
-        self.bk_norm  = 'BN'
-        self.bk_depthwise = False
         self.use_pretrained = True
-        ## Neck
-        self.neck_act       = 'silu'
-        self.neck_norm      = 'BN'
-        self.neck_depthwise = False
-        self.neck_expand_ratio = 0.5
-        self.spp_pooling_size  = 5
-        ## FPN
-        self.fpn_act  = 'silu'
-        self.fpn_norm = 'BN'
-        self.fpn_depthwise = False
+
         ## Head
-        self.head_act  = 'silu'
-        self.head_norm = 'BN'
-        self.head_depthwise = False
-        self.num_cls_head   = 2
-        self.num_reg_head   = 2
+        self.num_cls_head = 2
+        self.num_reg_head = 2
 
         # ---------------- Post-process config ----------------
         ## Post process
@@ -125,10 +111,10 @@ class Yolov8NConfig(Yolov8BaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
+        self.model_scale = "n"
         self.width = 0.25
         self.depth = 0.34
         self.ratio = 2.0
-        self.scale = "n"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -140,10 +126,10 @@ class Yolov8SConfig(Yolov8BaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
+        self.model_scale = "s"
         self.width = 0.50
         self.depth = 0.34
         self.ratio = 2.0
-        self.scale = "s"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -155,10 +141,10 @@ class Yolov8MConfig(Yolov8BaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
+        self.model_scale = "m"
         self.width = 0.75
         self.depth = 0.67
         self.ratio = 1.5
-        self.scale = "m"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -170,10 +156,10 @@ class Yolov8LConfig(Yolov8BaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
+        self.model_scale = "l"
         self.width = 1.0
         self.depth = 1.0
         self.ratio = 1.0
-        self.scale = "l"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -185,10 +171,10 @@ class Yolov8XConfig(Yolov8BaseConfig):
     def __init__(self) -> None:
         super().__init__()
         # ---------------- Model config ----------------
+        self.model_scale = "x"
         self.width = 1.25
         self.depth = 1.0
         self.ratio = 1.0
-        self.scale = "x"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0

+ 12 - 27
yolo/config/yolov9_config.py

@@ -1,10 +1,10 @@
 # Gelan (proposed by yolov9) config
 
 
-def build_gelan_config(args):
-    if   args.model == 'gelan_s':
+def build_yolov9_config(args):
+    if   args.model == 'yolov9_s':
         return GElanSConfig()
-    elif args.model == 'gelan_c':
+    elif args.model == 'yolov9_c':
         return GElanCConfig()
     else:
         raise NotImplementedError("No config for model: {}".format(args.model))
@@ -16,12 +16,8 @@ class GElanBaseConfig(object):
         self.reg_max  = 16
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
-        self.num_levels = 3
+
         ## Backbone
-        self.backbone = 'gelan'
-        self.bk_act   = 'silu'
-        self.bk_norm  = 'BN'
-        self.bk_depthwise = False
         self.use_pretrained = True
         self.backbone_feats = {
             "c1": [64],
@@ -30,20 +26,14 @@ class GElanBaseConfig(object):
             "c4": [512, [512, 256], 512],
             "c5": [512, [512, 256], 512],
         }
-        self.scale = "l"
+        self.model_scale = "l"
         self.backbone_depth = 1
+
         ## Neck
-        self.neck           = 'spp_elan'
-        self.neck_act       = 'silu'
-        self.neck_norm      = 'BN'
-        self.spp_pooling_size  = 5
         self.spp_inter_dim     = 256
         self.spp_out_dim       = 512
+
         ## FPN
-        self.fpn      = 'gelan_pafpn'
-        self.fpn_act  = 'silu'
-        self.fpn_norm = 'BN'
-        self.fpn_depthwise = False
         self.fpn_depth    = 1
         self.fpn_feats_td = {
             "p4": [[512, 256], 512],
@@ -53,13 +43,10 @@ class GElanBaseConfig(object):
             "p4": [[512, 256], 512],
             "p5": [[512, 256], 512],
         }
+
         ## Head
-        self.head      = 'gelan_head'
-        self.head_act  = 'silu'
-        self.head_norm = 'BN'
-        self.head_depthwise = False
-        self.num_cls_head   = 2
-        self.num_reg_head   = 2
+        self.num_cls_head = 2
+        self.num_reg_head = 2
 
         # ---------------- Post-process config ----------------
         ## Post process
@@ -136,9 +123,8 @@ class GElanBaseConfig(object):
 class GElanCConfig(GElanBaseConfig):
     def __init__(self) -> None:
         super().__init__()
-        self.backbone = 'gelan'
         self.use_pretrained = True
-        self.scale = "c"
+        self.model_scale = "c"
      
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -151,7 +137,6 @@ class GElanSConfig(GElanBaseConfig):
         super().__init__()
         # ---------------- Model config ----------------
         ## Backbone
-        self.backbone = 'gelan'
         self.use_pretrained = True
         self.backbone_feats = {
             "c1": [32],
@@ -160,7 +145,7 @@ class GElanSConfig(GElanBaseConfig):
             "c4": [128, [128, 64],  256],
             "c5": [256, [256, 128], 256],
         }
-        self.scale = "s"
+        self.model_scale = "s"
         self.backbone_depth = 3
         ## Neck
         self.spp_inter_dim = 128

+ 6 - 6
yolo/config/yolox_config.py

@@ -24,7 +24,7 @@ class YoloxBaseConfig(object):
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
         self.num_levels = 3
-        self.scale      = "b"
+        self.model_scale = "l"
         ## Backbone
         self.use_pretrained = True
         ## Head
@@ -109,7 +109,7 @@ class YoloxNConfig(YoloxBaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.25
         self.depth = 0.34
-        self.scale = "n"
+        self.model_scale = "n"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -123,7 +123,7 @@ class YoloxSConfig(YoloxBaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.50
         self.depth = 0.34
-        self.scale = "s"
+        self.model_scale = "s"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -137,7 +137,7 @@ class YoloxMConfig(YoloxBaseConfig):
         # ---------------- Model config ----------------
         self.width = 0.75
         self.depth = 0.67
-        self.scale = "m"
+        self.model_scale = "m"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -151,7 +151,7 @@ class YoloxLConfig(YoloxBaseConfig):
         # ---------------- Model config ----------------
         self.width = 1.0
         self.depth = 1.0
-        self.scale = "l"
+        self.model_scale = "l"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0
@@ -165,7 +165,7 @@ class YoloxXConfig(YoloxBaseConfig):
         # ---------------- Model config ----------------
         self.width = 1.25
         self.depth = 1.34
-        self.scale = "x"
+        self.model_scale = "x"
 
         # ---------------- Data process config ----------------
         self.mosaic_prob = 1.0

+ 1 - 1
yolo/models/__init__.py

@@ -38,7 +38,7 @@ def build_model(args, cfg, is_val=False):
     elif 'yolov8' in args.model:
         model, criterion = build_yolov8(cfg, is_val)
     ## GElan
-    elif 'gelan' in args.model:
+    elif 'yolov9' in args.model:
         model, criterion = build_gelan(cfg, is_val)
     ## RT-DETR
     elif 'rtdetr' in args.model:

+ 0 - 2
yolo/models/yolov5/modules.py

@@ -21,8 +21,6 @@ class ConvModule(nn.Module):
     def forward(self, x):
         return self.act(self.norm(self.conv(x)))
 
-
-# ---------------------------- Basic Modules ----------------------------
 class YoloBottleneck(nn.Module):
     def __init__(self,
                  in_dim      :int,

+ 2 - 2
yolo/models/yolov5/yolov5_backbone.py

@@ -22,7 +22,7 @@ class Yolov5Backbone(nn.Module):
     def __init__(self, cfg):
         super(Yolov5Backbone, self).__init__()
         # ------------------ Basic setting ------------------
-        self.model_scale = cfg.scale
+        self.model_scale = cfg.model_scale
         self.feat_dims = [round(64   * cfg.width),
                           round(128  * cfg.width),
                           round(256  * cfg.width),
@@ -129,7 +129,7 @@ if __name__ == '__main__':
         def __init__(self) -> None:
             self.width = 0.5
             self.depth = 0.34
-            self.scale = "s"
+            self.model_scale = "s"
             self.use_pretrained = True
 
     cfg = BaseConfig()

+ 75 - 0
yolo/models/yolov8/modules.py

@@ -0,0 +1,75 @@
+import torch
+import torch.nn as nn
+from typing import List
+
+
+# --------------------- Basic modules ---------------------
+class ConvModule(nn.Module):
+    def __init__(self, 
+                 in_dim,        # in channels
+                 out_dim,       # out channels 
+                 kernel_size=1, # kernel size 
+                 padding=0,     # padding
+                 stride=1,      # padding
+                 dilation=1,    # dilation
+                ):
+        super(ConvModule, self).__init__()
+        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False)
+        self.norm = nn.BatchNorm2d(out_dim)
+        self.act  = nn.SiLU(inplace=True)
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+class YoloBottleneck(nn.Module):
+    def __init__(self,
+                 in_dim      :int,
+                 out_dim     :int,
+                 kernel_size :List  = [1, 3],
+                 expansion   :float = 0.5,
+                 shortcut    :bool  = False,
+                 ):
+        super(YoloBottleneck, self).__init__()
+        inter_dim = int(out_dim * expansion)
+        # ----------------- Network setting -----------------
+        self.conv_layer1 = ConvModule(in_dim, inter_dim, kernel_size=kernel_size[0], padding=kernel_size[0]//2, stride=1)
+        self.conv_layer2 = ConvModule(inter_dim, out_dim, kernel_size=kernel_size[1], padding=kernel_size[1]//2, stride=1)
+        self.shortcut = shortcut and in_dim == out_dim
+
+    def forward(self, x):
+        h = self.conv_layer2(self.conv_layer1(x))
+
+        return x + h if self.shortcut else h
+
+class C2fBlock(nn.Module):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 expansion : float = 0.5,
+                 num_blocks : int = 1,
+                 shortcut  : bool = False,
+                 ):
+        super(C2fBlock, self).__init__()
+        inter_dim = round(out_dim * expansion)
+        self.input_proj  = ConvModule(in_dim, inter_dim * 2, kernel_size=1)
+        self.output_proj = ConvModule((2 + num_blocks) * inter_dim, out_dim, kernel_size=1)
+        self.module = nn.ModuleList([
+            YoloBottleneck(in_dim = inter_dim,
+                           out_dim = inter_dim,
+                           kernel_size = [3, 3],
+                           expansion = 1.0,
+                           shortcut = shortcut,
+                           ) for _ in range(num_blocks)])
+
+    def forward(self, x):
+        # Input proj
+        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
+        out = list([x1, x2])
+
+        # Bottlenecl
+        out.extend(m(out[-1]) for m in self.module)
+
+        # Output proj
+        out = self.output_proj(torch.cat(out, dim=1))
+
+        return out

+ 4 - 4
yolo/models/yolov8/yolov8.py

@@ -34,14 +34,14 @@ class Yolov8(nn.Module):
         self.backbone = Yolov8Backbone(cfg)
         self.pyramid_feat_dims = self.backbone.feat_dims[-3:]
         ## Neck
-        self.neck     = SPPF(cfg, self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1])
+        self.neck = SPPF(self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1])
         self.pyramid_feat_dims[-1] = self.neck.out_dim
         ## Neck: PaFPN
-        self.fpn      = Yolov8PaFPN(cfg, self.backbone.feat_dims)
+        self.fpn = Yolov8PaFPN(cfg, self.backbone.feat_dims)
         ## Head
-        self.head     = Yolov8DetHead(cfg, self.fpn.out_dims)
+        self.head = Yolov8DetHead(cfg, self.fpn.out_dims)
         ## Pred
-        self.pred     = Yolov8DetPredLayer(cfg, self.head.cls_head_dim, self.head.reg_head_dim)
+        self.pred = Yolov8DetPredLayer(cfg, self.head.cls_head_dim, self.head.reg_head_dim)
 
     def post_process(self, cls_preds, box_preds):
         """

+ 34 - 66
yolo/models/yolov8/yolov8_backbone.py

@@ -2,9 +2,9 @@ import torch
 import torch.nn as nn
 
 try:
-    from .yolov8_basic import BasicConv, ELANLayer
+    from .modules import ConvModule, C2fBlock
 except:
-    from  yolov8_basic import BasicConv, ELANLayer
+    from  modules import ConvModule, C2fBlock
 
 # IN1K pretrained weight
 pretrained_urls = {
@@ -20,7 +20,7 @@ class Yolov8Backbone(nn.Module):
     def __init__(self, cfg):
         super(Yolov8Backbone, self).__init__()
         # ------------------ Basic setting ------------------
-        self.model_scale = cfg.scale
+        self.model_scale = cfg.model_scale
         self.feat_dims = [round(64  * cfg.width),
                           round(128 * cfg.width),
                           round(256 * cfg.width),
@@ -29,64 +29,46 @@ class Yolov8Backbone(nn.Module):
         
         # ------------------ Network setting ------------------
         ## P1/2
-        self.layer_1 = BasicConv(3, self.feat_dims[0],
-                                 kernel_size=3, padding=1, stride=2,
-                                 act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise)
+        self.layer_1 = ConvModule(3, self.feat_dims[0], kernel_size=3, padding=1, stride=2)
         # P2/4
         self.layer_2 = nn.Sequential(
-            BasicConv(self.feat_dims[0], self.feat_dims[1],
-                      kernel_size=3, padding=1, stride=2,
-                      act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
-            ELANLayer(in_dim     = self.feat_dims[1],
-                      out_dim    = self.feat_dims[1],
-                      num_blocks = round(3*cfg.depth),
-                      expansion  = 0.5,
-                      shortcut   = True,
-                      act_type   = cfg.bk_act,
-                      norm_type  = cfg.bk_norm,
-                      depthwise  = cfg.bk_depthwise)
+            ConvModule(self.feat_dims[0], self.feat_dims[1], kernel_size=3, padding=1, stride=2),
+            C2fBlock(in_dim     = self.feat_dims[1],
+                     out_dim    = self.feat_dims[1],
+                     num_blocks = round(3*cfg.depth),
+                     expansion  = 0.5,
+                     shortcut   = True,
+                     )
         )
         # P3/8
         self.layer_3 = nn.Sequential(
-            BasicConv(self.feat_dims[1], self.feat_dims[2],
-                      kernel_size=3, padding=1, stride=2,
-                      act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
-            ELANLayer(in_dim     = self.feat_dims[2],
-                      out_dim    = self.feat_dims[2],
-                      num_blocks = round(6*cfg.depth),
-                      expansion  = 0.5,
-                      shortcut   = True,
-                      act_type   = cfg.bk_act,
-                      norm_type  = cfg.bk_norm,
-                      depthwise  = cfg.bk_depthwise)
+            ConvModule(self.feat_dims[1], self.feat_dims[2], kernel_size=3, padding=1, stride=2),
+            C2fBlock(in_dim     = self.feat_dims[2],
+                     out_dim    = self.feat_dims[2],
+                     num_blocks = round(6*cfg.depth),
+                     expansion  = 0.5,
+                     shortcut   = True,
+                     )
         )
         # P4/16
         self.layer_4 = nn.Sequential(
-            BasicConv(self.feat_dims[2], self.feat_dims[3],
-                      kernel_size=3, padding=1, stride=2,
-                      act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
-            ELANLayer(in_dim     = self.feat_dims[3],
-                      out_dim    = self.feat_dims[3],
-                      num_blocks = round(6*cfg.depth),
-                      expansion  = 0.5,
-                      shortcut   = True,
-                      act_type   = cfg.bk_act,
-                      norm_type  = cfg.bk_norm,
-                      depthwise  = cfg.bk_depthwise)
+            ConvModule(self.feat_dims[2], self.feat_dims[3], kernel_size=3, padding=1, stride=2),
+            C2fBlock(in_dim     = self.feat_dims[3],
+                     out_dim    = self.feat_dims[3],
+                     num_blocks = round(6*cfg.depth),
+                     expansion  = 0.5,
+                     shortcut   = True,
+                     )
         )
         # P5/32
         self.layer_5 = nn.Sequential(
-            BasicConv(self.feat_dims[3], self.feat_dims[4],
-                      kernel_size=3, padding=1, stride=2,
-                      act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
-            ELANLayer(in_dim     = self.feat_dims[4],
-                      out_dim    = self.feat_dims[4],
-                      num_blocks = round(3*cfg.depth),
-                      expansion  = 0.5,
-                      shortcut   = True,
-                      act_type   = cfg.bk_act,
-                      norm_type  = cfg.bk_norm,
-                      depthwise  = cfg.bk_depthwise)
+            ConvModule(self.feat_dims[3], self.feat_dims[4], kernel_size=3, padding=1, stride=2),
+            C2fBlock(in_dim     = self.feat_dims[4],
+                     out_dim    = self.feat_dims[4],
+                     num_blocks = round(3*cfg.depth),
+                     expansion  = 0.5,
+                     shortcut   = True,
+                     )
         )
 
         # Initialize all layers
@@ -100,8 +82,6 @@ class Yolov8Backbone(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def load_pretrained(self):
@@ -140,31 +120,19 @@ class Yolov8Backbone(nn.Module):
         return outputs
 
 
-# ---------------------------- Functions ----------------------------
-## build Yolo's Backbone
-def build_backbone(cfg): 
-    # model
-    backbone = Yolov8Backbone(cfg)
-        
-    return backbone
-
-
 if __name__ == '__main__':
     import time
     from thop import profile
     class BaseConfig(object):
         def __init__(self) -> None:
-            self.bk_act = 'silu'
-            self.bk_norm = 'BN'
-            self.bk_depthwise = False
             self.use_pretrained = True
             self.width = 0.50
             self.depth = 0.34
             self.ratio = 2.0
-            self.scale = "s"
+            self.model_scale = "s"
 
     cfg = BaseConfig()
-    model = build_backbone(cfg)
+    model = Yolov8Backbone(cfg)
     x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)

+ 0 - 172
yolo/models/yolov8/yolov8_basic.py

@@ -1,172 +0,0 @@
-import torch
-import torch.nn as nn
-from typing import List
-
-
-# --------------------- Basic modules ---------------------
-def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
-    conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
-
-    return conv
-
-def get_activation(act_type=None):
-    if act_type == 'relu':
-        return nn.ReLU(inplace=True)
-    elif act_type == 'lrelu':
-        return nn.LeakyReLU(0.1, inplace=True)
-    elif act_type == 'mish':
-        return nn.Mish(inplace=True)
-    elif act_type == 'silu':
-        return nn.SiLU(inplace=True)
-    elif act_type is None:
-        return nn.Identity()
-    else:
-        raise NotImplementedError
-        
-def get_norm(norm_type, dim):
-    if norm_type == 'BN':
-        return nn.BatchNorm2d(dim)
-    elif norm_type == 'GN':
-        return nn.GroupNorm(num_groups=32, num_channels=dim)
-    elif norm_type is None:
-        return nn.Identity()
-    else:
-        raise NotImplementedError
-
-class BasicConv(nn.Module):
-    def __init__(self, 
-                 in_dim,                   # in channels
-                 out_dim,                  # out channels 
-                 kernel_size=1,            # kernel size 
-                 padding=0,                # padding
-                 stride=1,                 # padding
-                 dilation=1,               # dilation
-                 act_type  :str = 'lrelu', # activation
-                 norm_type :str = 'BN',    # normalization
-                 depthwise :bool = False
-                ):
-        super(BasicConv, self).__init__()
-        self.depthwise = depthwise
-        use_bias = False if norm_type is not None else True
-        if not depthwise:
-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=1, bias=use_bias)
-            self.norm = get_norm(norm_type, out_dim)
-        else:
-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim, bias=use_bias)
-            self.norm1 = get_norm(norm_type, in_dim)
-            self.conv2 = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, d=1, g=1)
-            self.norm2 = get_norm(norm_type, out_dim)
-        self.act  = get_activation(act_type)
-
-    def forward(self, x):
-        if not self.depthwise:
-            return self.act(self.norm(self.conv(x)))
-        else:
-            # Depthwise conv
-            x = self.norm1(self.conv1(x))
-            # Pointwise conv
-            x = self.act(self.norm2(self.conv2(x)))
-            return x
-
-
-# --------------------- Yolov8 modules ---------------------
-class YoloBottleneck(nn.Module):
-    def __init__(self,
-                 in_dim      :int,
-                 out_dim     :int,
-                 kernel_size :List  = [1, 3],
-                 expansion   :float = 0.5,
-                 shortcut    :bool  = False,
-                 act_type    :str   = 'silu',
-                 norm_type   :str   = 'BN',
-                 depthwise   :bool  = False,
-                 ) -> None:
-        super(YoloBottleneck, self).__init__()
-        inter_dim = int(out_dim * expansion)
-        # ----------------- Network setting -----------------
-        self.conv_layer1 = BasicConv(in_dim, inter_dim,
-                                     kernel_size=kernel_size[0], padding=kernel_size[0]//2, stride=1,
-                                     act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.conv_layer2 = BasicConv(inter_dim, out_dim,
-                                     kernel_size=kernel_size[1], padding=kernel_size[1]//2, stride=1,
-                                     act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.shortcut = shortcut and in_dim == out_dim
-
-    def forward(self, x):
-        h = self.conv_layer2(self.conv_layer1(x))
-
-        return x + h if self.shortcut else h
-
-class CSPLayer(nn.Module):
-    # CSP Bottleneck with 3 convolutions
-    def __init__(self,
-                 in_dim      :int,
-                 out_dim     :int,
-                 num_blocks  :int   = 1,
-                 kernel_size :List = [3, 3],
-                 expansion   :float = 0.5,
-                 shortcut    :bool  = True,
-                 act_type    :str   = 'silu',
-                 norm_type   :str   = 'BN',
-                 depthwise   :bool  = False,
-                 ) -> None:
-        super().__init__()
-        inter_dim = round(out_dim * expansion)
-        self.input_proj_1 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.input_proj_2 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.output_proj  = BasicConv(2 * inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.module       = nn.Sequential(*[YoloBottleneck(inter_dim,
-                                                           inter_dim,
-                                                           kernel_size,
-                                                           expansion   = 1.0,
-                                                           shortcut    = shortcut,
-                                                           act_type    = act_type,
-                                                           norm_type   = norm_type,
-                                                           depthwise   = depthwise,
-                                                           ) for _ in range(num_blocks)])
-
-    def forward(self, x):
-        x1 = self.input_proj_1(x)
-        x2 = self.input_proj_2(x)
-        x2 = self.module(x2)
-        out = self.output_proj(torch.cat([x1, x2], dim=1))
-
-        return out
-
-class ELANLayer(nn.Module):
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 expansion  :float = 0.5,
-                 num_blocks :int   = 1,
-                 shortcut   :bool  = False,
-                 act_type   :str   = 'silu',
-                 norm_type  :str   = 'BN',
-                 depthwise  :bool  = False,
-                 ) -> None:
-        super(ELANLayer, self).__init__()
-        inter_dim = round(out_dim * expansion)
-        self.input_proj  = BasicConv(in_dim, inter_dim * 2, kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.output_proj = BasicConv((2 + num_blocks) * inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.module      = nn.ModuleList([YoloBottleneck(inter_dim,
-                                                         inter_dim,
-                                                         kernel_size = [3, 3],
-                                                         expansion   = 1.0,
-                                                         shortcut    = shortcut,
-                                                         act_type    = act_type,
-                                                         norm_type   = norm_type,
-                                                         depthwise   = depthwise)
-                                                         for _ in range(num_blocks)])
-
-    def forward(self, x):
-        # Input proj
-        x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
-        out = list([x1, x2])
-
-        # Bottlenecl
-        out.extend(m(out[-1]) for m in self.module)
-
-        # Output proj
-        out = self.output_proj(torch.cat(out, dim=1))
-
-        return out

+ 17 - 53
yolo/models/yolov8/yolov8_head.py

@@ -1,10 +1,11 @@
 import torch
 import torch.nn as nn
+from typing import List
 
 try:
-    from .yolov8_basic import BasicConv
+    from .modules import ConvModule
 except:
-    from  yolov8_basic import BasicConv
+    from  modules import ConvModule
 
 
 # -------------------- Detection Head --------------------
@@ -16,59 +17,32 @@ class DetHead(nn.Module):
                  reg_head_dim :int  = 256,
                  num_cls_head :int  = 2,
                  num_reg_head :int  = 2,
-                 act_type     :str  = "silu",
-                 norm_type    :str  = "BN",
-                 depthwise    :bool = False):
+                 ):
         super().__init__()
         # --------- Basic Parameters ----------
         self.in_dim = in_dim
         self.num_cls_head = num_cls_head
         self.num_reg_head = num_reg_head
-        self.act_type = act_type
-        self.norm_type = norm_type
-        self.depthwise = depthwise
         
         # --------- Network Parameters ----------
-        ## cls head
+        ## classification head
         cls_feats = []
         self.cls_head_dim = cls_head_dim
         for i in range(num_cls_head):
             if i == 0:
-                cls_feats.append(
-                    BasicConv(in_dim, self.cls_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
             else:
-                cls_feats.append(
-                    BasicConv(self.cls_head_dim, self.cls_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
-        ## reg head
+                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
+        
+        ## bbox regression head
         reg_feats = []
         self.reg_head_dim = reg_head_dim
         for i in range(num_reg_head):
             if i == 0:
-                reg_feats.append(
-                    BasicConv(in_dim, self.reg_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
             else:
-                reg_feats.append(
-                    BasicConv(self.reg_head_dim, self.reg_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
+        
         self.cls_feats = nn.Sequential(*cls_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
 
@@ -78,8 +52,6 @@ class DetHead(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, x):
@@ -93,8 +65,9 @@ class DetHead(nn.Module):
     
 ## Multi-level Detection Head
 class Yolov8DetHead(nn.Module):
-    def __init__(self, cfg, in_dims):
+    def __init__(self, cfg, in_dims: List = [256, 512, 1024]):
         super().__init__()
+        self.num_levels = len(cfg.out_stride)
         ## ----------- Network Parameters -----------
         self.multi_level_heads = nn.ModuleList(
             [DetHead(in_dim       = in_dims[level],
@@ -102,17 +75,12 @@ class Yolov8DetHead(nn.Module):
                      reg_head_dim = max(in_dims[0]//4, 16, 4*cfg.reg_max),
                      num_cls_head = cfg.num_cls_head,
                      num_reg_head = cfg.num_reg_head,
-                     act_type     = cfg.head_act,
-                     norm_type    = cfg.head_norm,
-                     depthwise    = cfg.head_depthwise)
-                     for level in range(cfg.num_levels)
-                     ])
+                     ) for level in range(self.num_levels)])
         # --------- Basic Parameters ----------
         self.in_dims = in_dims
         self.cls_head_dim = self.multi_level_heads[0].cls_head_dim
         self.reg_head_dim = self.multi_level_heads[0].reg_head_dim
 
-
     def forward(self, feats):
         """
             feats: List[(Tensor)] [[B, C, H, W], ...]
@@ -132,7 +100,6 @@ class Yolov8DetHead(nn.Module):
 if __name__=='__main__':
     import time
     from thop import profile
-    # Model config
     
     # YOLOv8-Base config
     class Yolov8BaseConfig(object):
@@ -146,11 +113,8 @@ if __name__=='__main__':
             self.max_stride = 32
             self.num_levels = 3
             ## Head
-            self.head_act  = 'lrelu'
-            self.head_norm = 'BN'
-            self.head_depthwise = False
-            self.num_cls_head   = 2
-            self.num_reg_head   = 2
+            self.num_cls_head = 2
+            self.num_reg_head = 2
 
     cfg = Yolov8BaseConfig()
     cfg.num_classes = 20

+ 8 - 31
yolo/models/yolov8/yolov8_neck.py

@@ -2,9 +2,9 @@ import torch
 import torch.nn as nn
 
 try:
-    from .yolov8_basic import BasicConv
+    from .modules import ConvModule
 except:
-    from  yolov8_basic import BasicConv
+    from  modules import ConvModule
     
 
 # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
@@ -12,21 +12,15 @@ class SPPF(nn.Module):
     """
         This code referenced to https://github.com/ultralytics/yolov5
     """
-    def __init__(self, cfg, in_dim, out_dim):
+    def __init__(self, in_dim, out_dim):
         super().__init__()
         ## ----------- Basic Parameters -----------
-        inter_dim = round(in_dim * cfg.neck_expand_ratio)
+        inter_dim = in_dim // 2
         self.out_dim = out_dim
         ## ----------- Network Parameters -----------
-        self.cv1 = BasicConv(in_dim, inter_dim,
-                             kernel_size=1, padding=0, stride=1,
-                             act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.cv2 = BasicConv(inter_dim * 4, out_dim,
-                             kernel_size=1, padding=0, stride=1,
-                             act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.m = nn.MaxPool2d(kernel_size=cfg.spp_pooling_size,
-                              stride=1,
-                              padding=cfg.spp_pooling_size // 2)
+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1, padding=0, stride=1)
+        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1, padding=0, stride=1)
+        self.m = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
 
         # Initialize all layers
         self.init_weights()
@@ -35,8 +29,6 @@ class SPPF(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, x):
@@ -50,26 +42,11 @@ class SPPF(nn.Module):
 if __name__=='__main__':
     import time
     from thop import profile
-    # Model config
     
-    # YOLOv8-Base config
-    class Yolov8BaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.out_stride = 32
-            self.max_stride = 32
-            ## Neck
-            self.neck_act       = 'lrelu'
-            self.neck_norm      = 'BN'
-            self.neck_depthwise = False
-            self.neck_expand_ratio = 0.5
-            self.spp_pooling_size  = 5
-
-    cfg = Yolov8BaseConfig()
     # Build a head
     in_dim  = 512
     out_dim = 512
-    neck = SPPF(cfg, in_dim, out_dim)
+    neck = SPPF(in_dim, out_dim)
 
     # Inference
     x = torch.randn(1, in_dim, 20, 20)

+ 24 - 51
yolo/models/yolov8/yolov8_pafpn.py

@@ -4,72 +4,51 @@ import torch.nn.functional as F
 from typing import List
 
 try:
-    from .yolov8_basic import BasicConv, ELANLayer
+    from .modules import ConvModule, C2fBlock
 except:
-    from  yolov8_basic import BasicConv, ELANLayer
+    from  modules import ConvModule, C2fBlock
 
 
 # YOLOv8's PaFPN
 class Yolov8PaFPN(nn.Module):
-    def __init__(self,
-                 cfg,
-                 in_dims :List = [256, 512, 1024],
-                 ) -> None:
+    def __init__(self, cfg, in_dims :List = [256, 512, 1024]) -> None:
         super(Yolov8PaFPN, self).__init__()
-        print('==============================')
-        print('FPN: {}'.format("Yolo PaFPN"))
         # --------------------------- Basic Parameters ---------------------------
         self.in_dims = in_dims[::-1]
         self.out_dims = [round(256*cfg.width), round(512*cfg.width), round(512*cfg.width*cfg.ratio)]
 
         # ----------------------------- Yolov8's Top-down FPN -----------------------------
         ## P5 -> P4
-        self.top_down_layer_1 = ELANLayer(in_dim     = self.in_dims[0] + self.in_dims[1],
+        self.top_down_layer_1 = C2fBlock(in_dim     = self.in_dims[0] + self.in_dims[1],
+                                         out_dim    = round(512*cfg.width),
+                                         expansion  = 0.5,
+                                         num_blocks = round(3 * cfg.depth),
+                                         shortcut   = False,
+                                         )
+        ## P4 -> P3
+        self.top_down_layer_2 = C2fBlock(in_dim     = self.in_dims[2] + round(512*cfg.width),
+                                         out_dim    = round(256*cfg.width),
+                                         expansion  = 0.5,
+                                         num_blocks = round(3 * cfg.depth),
+                                         shortcut   = False,
+                                         )
+        # ----------------------------- Yolov8's Bottom-up PAN -----------------------------
+        ## P3 -> P4
+        self.dowmsample_layer_1 = ConvModule(round(256*cfg.width), round(256*cfg.width), kernel_size=3, padding=1, stride=2)
+        self.bottom_up_layer_1 = C2fBlock(in_dim     = round(256*cfg.width) + round(512*cfg.width),
                                           out_dim    = round(512*cfg.width),
                                           expansion  = 0.5,
                                           num_blocks = round(3 * cfg.depth),
                                           shortcut   = False,
-                                          act_type   = cfg.fpn_act,
-                                          norm_type  = cfg.fpn_norm,
-                                          depthwise  = cfg.fpn_depthwise,
                                           )
-        ## P4 -> P3
-        self.top_down_layer_2 = ELANLayer(in_dim     = self.in_dims[2] + round(512*cfg.width),
-                                          out_dim    = round(256*cfg.width),
+        ## P4 -> P5
+        self.dowmsample_layer_2 = ConvModule(round(512*cfg.width), round(512*cfg.width), kernel_size=3, padding=1, stride=2)
+        self.bottom_up_layer_2 = C2fBlock(in_dim     = round(512*cfg.width) + self.in_dims[0],
+                                          out_dim    = round(512*cfg.width*cfg.ratio),
                                           expansion  = 0.5,
                                           num_blocks = round(3 * cfg.depth),
                                           shortcut   = False,
-                                          act_type   = cfg.fpn_act,
-                                          norm_type  = cfg.fpn_norm,
-                                          depthwise  = cfg.fpn_depthwise,
                                           )
-        # ----------------------------- Yolov8's Bottom-up PAN -----------------------------
-        ## P3 -> P4
-        self.dowmsample_layer_1 = BasicConv(round(256*cfg.width), round(256*cfg.width),
-                                            kernel_size=3, padding=1, stride=2,
-                                            act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
-        self.bottom_up_layer_1 = ELANLayer(in_dim     = round(256*cfg.width) + round(512*cfg.width),
-                                           out_dim    = round(512*cfg.width),
-                                           expansion  = 0.5,
-                                           num_blocks = round(3 * cfg.depth),
-                                           shortcut   = False,
-                                           act_type   = cfg.fpn_act,
-                                           norm_type  = cfg.fpn_norm,
-                                           depthwise  = cfg.fpn_depthwise,
-                                           )
-        ## P4 -> P5
-        self.dowmsample_layer_2 = BasicConv(round(512*cfg.width), round(512*cfg.width),
-                                            kernel_size=3, padding=1, stride=2,
-                                            act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
-        self.bottom_up_layer_2 = ELANLayer(in_dim     = round(512*cfg.width) + self.in_dims[0],
-                                           out_dim    = round(512*cfg.width*cfg.ratio),
-                                           expansion  = 0.5,
-                                           num_blocks = round(3 * cfg.depth),
-                                           shortcut   = False,
-                                           act_type   = cfg.fpn_act,
-                                           norm_type  = cfg.fpn_norm,
-                                           depthwise  = cfg.fpn_depthwise,
-                                           )
 
         self.init_weights()
         
@@ -77,8 +56,6 @@ class Yolov8PaFPN(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, features):
@@ -122,10 +99,6 @@ if __name__=='__main__':
             self.out_stride = [8, 16, 32]
             self.max_stride = 32
             self.num_levels = 3
-            ## FPN
-            self.fpn_act  = 'silu'
-            self.fpn_norm = 'BN'
-            self.fpn_depthwise = False
             ## Head
             self.head_dim = 256
 

+ 4 - 7
yolo/models/yolov8/yolov8_pred.py

@@ -85,16 +85,13 @@ class DetPredLayer(nn.Module):
 
 ## Multi-level pred layer
 class Yolov8DetPredLayer(nn.Module):
-    def __init__(self,
-                 cfg,
-                 cls_dim,
-                 reg_dim,
-                 ):
+    def __init__(self, cfg, cls_dim: int, reg_dim: int):
         super().__init__()
         # --------- Basic Parameters ----------
         self.cfg = cfg
         self.cls_dim = cls_dim
         self.reg_dim = reg_dim
+        self.num_levels = len(cfg.out_stride)
 
         # ----------- Network Parameters -----------
         ## pred layers
@@ -105,7 +102,7 @@ class Yolov8DetPredLayer(nn.Module):
                           reg_max     = cfg.reg_max,
                           num_classes = cfg.num_classes,
                           num_coords  = 4 * cfg.reg_max)
-                          for level in range(cfg.num_levels)
+                          for level in range(self.num_levels)
                           ])
         ## proj conv
         proj_init = torch.arange(cfg.reg_max, dtype=torch.float)
@@ -118,7 +115,7 @@ class Yolov8DetPredLayer(nn.Module):
         all_cls_preds = []
         all_reg_preds = []
         all_box_preds = []
-        for level in range(self.cfg.num_levels):
+        for level in range(self.num_levels):
             # -------------- Single-level prediction --------------
             outputs = self.multi_level_preds[level](cls_feats[level], reg_feats[level])
 

+ 6 - 6
yolo/models/yolov9/gelan.py

@@ -3,7 +3,7 @@ import torch
 import torch.nn as nn
 
 # --------------- Model components ---------------
-from .gelan_backbone import build_backbone
+from .gelan_backbone import GElanBackbone
 from .gelan_neck     import SPPElan
 from .gelan_pafpn    import GElanPaFPN
 from .gelan_head     import GElanDetHead
@@ -33,14 +33,14 @@ class GElan(nn.Module):
         
         # ---------------------- Network Parameters ----------------------
         ## Backbone
-        self.backbone = build_backbone(cfg)
-        self.neck     = SPPElan(cfg, self.backbone.feat_dims[-1])
+        self.backbone = GElanBackbone(cfg)
+        self.neck = SPPElan(cfg, self.backbone.feat_dims[-1])
         self.backbone.feat_dims[-1] = self.neck.out_dim
         ## PaFPN
-        self.fpn      = GElanPaFPN(cfg, self.backbone.feat_dims)
+        self.fpn = GElanPaFPN(cfg, self.backbone.feat_dims)
         ## Detection head
-        self.head     = GElanDetHead(cfg, self.fpn.out_dims)
-        self.pred     = GElanPredLayer(cfg, self.head.cls_head_dim, self.head.reg_head_dim)
+        self.head = GElanDetHead(cfg, self.fpn.out_dims)
+        self.pred = GElanPredLayer(cfg, self.head.cls_head_dim, self.head.reg_head_dim)
 
     def switch_to_deploy(self,):
         for m in self.modules():

+ 15 - 45
yolo/models/yolov9/gelan_backbone.py

@@ -2,9 +2,9 @@ import torch
 import torch.nn as nn
 
 try:
-    from .gelan_basic import BasicConv, RepGElanLayer, ADown
+    from .modules import ConvModule, RepGElanLayer, ADown
 except:
-    from  gelan_basic import BasicConv, RepGElanLayer, ADown
+    from  modules import ConvModule, RepGElanLayer, ADown
 
 # IN1K pretrained weight
 pretrained_urls = {
@@ -17,7 +17,7 @@ class GElanBackbone(nn.Module):
     def __init__(self, cfg):
         super(GElanBackbone, self).__init__()
         # ---------- Basic setting ----------
-        self.model_scale = cfg.scale
+        self.model_scale = cfg.model_scale
         self.feat_dims = [cfg.backbone_feats["c1"][-1],  # 64
                           cfg.backbone_feats["c2"][-1],  # 128
                           cfg.backbone_feats["c3"][-1],  # 256
@@ -27,61 +27,46 @@ class GElanBackbone(nn.Module):
         
         # ---------- Network setting ----------
         ## P1/2
-        self.layer_1 = BasicConv(3, cfg.backbone_feats["c1"][0],
-                                 kernel_size=3, padding=1, stride=2,
-                                 act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise)
+        self.layer_1 = ConvModule(3, cfg.backbone_feats["c1"][0], kernel_size=3, padding=1, stride=2)
         # P2/4
         self.layer_2 = nn.Sequential(
-            BasicConv(cfg.backbone_feats["c1"][0], cfg.backbone_feats["c2"][0],
-                      kernel_size=3, padding=1, stride=2,
-                      act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
+            ConvModule(cfg.backbone_feats["c1"][0], cfg.backbone_feats["c2"][0], kernel_size=3, padding=1, stride=2),
             RepGElanLayer(in_dim     = cfg.backbone_feats["c2"][0],
                           inter_dims = cfg.backbone_feats["c2"][1],
                           out_dim    = cfg.backbone_feats["c2"][2],
                           num_blocks = cfg.backbone_depth,
                           shortcut   = True,
-                          act_type   = cfg.bk_act,
-                          norm_type  = cfg.bk_norm,
-                          depthwise  = cfg.bk_depthwise)
+                          )
         )
         # P3/8
         self.layer_3 = nn.Sequential(
-            ADown(cfg.backbone_feats["c2"][2], cfg.backbone_feats["c3"][0],
-                  act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
+            ADown(cfg.backbone_feats["c2"][2], cfg.backbone_feats["c3"][0]),
             RepGElanLayer(in_dim     = cfg.backbone_feats["c3"][0],
                           inter_dims = cfg.backbone_feats["c3"][1],
                           out_dim    = cfg.backbone_feats["c3"][2],
                           num_blocks = cfg.backbone_depth,
                           shortcut   = True,
-                          act_type   = cfg.bk_act,
-                          norm_type  = cfg.bk_norm,
-                          depthwise  = cfg.bk_depthwise)
+                          )
         )
         # P4/16
         self.layer_4 = nn.Sequential(
-            ADown(cfg.backbone_feats["c3"][2], cfg.backbone_feats["c4"][0],
-                  act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
+            ADown(cfg.backbone_feats["c3"][2], cfg.backbone_feats["c4"][0]),
             RepGElanLayer(in_dim     = cfg.backbone_feats["c4"][0],
                           inter_dims = cfg.backbone_feats["c4"][1],
                           out_dim    = cfg.backbone_feats["c4"][2],
                           num_blocks = cfg.backbone_depth,
                           shortcut   = True,
-                          act_type   = cfg.bk_act,
-                          norm_type  = cfg.bk_norm,
-                          depthwise  = cfg.bk_depthwise)
+                          )
         )
         # P5/32
         self.layer_5 = nn.Sequential(
-            ADown(cfg.backbone_feats["c4"][2], cfg.backbone_feats["c5"][0],
-                  act_type=cfg.bk_act, norm_type=cfg.bk_norm, depthwise=cfg.bk_depthwise),
+            ADown(cfg.backbone_feats["c4"][2], cfg.backbone_feats["c5"][0]),
             RepGElanLayer(in_dim     = cfg.backbone_feats["c5"][0],
                           inter_dims = cfg.backbone_feats["c5"][1],
                           out_dim    = cfg.backbone_feats["c5"][2],
                           num_blocks = cfg.backbone_depth,
                           shortcut   = True,
-                          act_type   = cfg.bk_act,
-                          norm_type  = cfg.bk_norm,
-                          depthwise  = cfg.bk_depthwise)
+                          )
         )
 
         # Initialize all layers
@@ -133,27 +118,12 @@ class GElanBackbone(nn.Module):
         return outputs
 
 
-# ------------ Functions ------------
-def build_backbone(cfg): 
-    # model
-    if   cfg.backbone == "gelan":
-        backbone = GElanBackbone(cfg)
-    else:
-        raise NotImplementedError("Unknown gelan backbone: {}".format(cfg.backbone))
-        
-    return backbone
-
-
 if __name__ == '__main__':
     import time
     from thop import profile
     class BaseConfig(object):
         def __init__(self) -> None:
-            self.backbone = 'gelan'
             self.use_pretrained = True
-            self.bk_act = 'silu'
-            self.bk_norm = 'BN'
-            self.bk_depthwise = False
             # # Gelan-C scale
             # self.backbone_feats = {
             #     "c1": [64],
@@ -162,7 +132,7 @@ if __name__ == '__main__':
             #     "c4": [512, [512, 256], 512],
             #     "c5": [512, [512, 256], 512],
             # }
-            # self.scale = "l"
+            # self.model_scale = "c"
             # self.backbone_depth = 1
             # Gelan-S scale
             self.backbone_feats = {
@@ -172,13 +142,13 @@ if __name__ == '__main__':
                 "c4": [128, [128, 64],  256],
                 "c5": [256, [256, 128], 256],
             }
-            self.scale = "s"
+            self.model_scale = "s"
             self.backbone_depth = 3
     # 定义模型配置文件
     cfg = BaseConfig()
 
     # 构建GELAN主干网络
-    model = build_backbone(cfg)
+    model = GElanBackbone(cfg)
 
     # 随机生成输入数据
     x = torch.randn(1, 3, 640, 640)

+ 0 - 312
yolo/models/yolov9/gelan_basic.py

@@ -1,312 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-from typing import List
-
-
-# --------------------- Basic modules ---------------------
-def get_conv2d(c1, c2, k, p, s, d, g, bias=False):
-    conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias)
-
-    return conv
-
-def get_activation(act_type=None):
-    if act_type == 'relu':
-        return nn.ReLU(inplace=True)
-    elif act_type == 'lrelu':
-        return nn.LeakyReLU(0.1, inplace=True)
-    elif act_type == 'mish':
-        return nn.Mish(inplace=True)
-    elif act_type == 'silu':
-        return nn.SiLU(inplace=True)
-    elif act_type is None:
-        return nn.Identity()
-    else:
-        raise NotImplementedError
-        
-def get_norm(norm_type, dim):
-    if norm_type == 'BN':
-        return nn.BatchNorm2d(dim)
-    elif norm_type == 'GN':
-        return nn.GroupNorm(num_groups=32, num_channels=dim)
-    elif norm_type is None:
-        return nn.Identity()
-    else:
-        raise NotImplementedError
-
-class BasicConv(nn.Module):
-    def __init__(self, 
-                 in_dim,                   # in channels
-                 out_dim,                  # out channels 
-                 kernel_size=1,            # kernel size 
-                 padding=0,                # padding
-                 stride=1,                 # padding
-                 dilation=1,               # dilation
-                 group=1,                  # group
-                 act_type  :str = 'lrelu', # activation
-                 norm_type :str = 'BN',    # normalization
-                 depthwise :bool = False
-                ):
-        super(BasicConv, self).__init__()
-        self.depthwise = depthwise
-        if not depthwise:
-            self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=group)
-            self.norm = get_norm(norm_type, out_dim)
-        else:
-            self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, d=dilation, g=in_dim)
-            self.norm1 = get_norm(norm_type, in_dim)
-            self.conv2 = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, d=1, g=1)
-            self.norm2 = get_norm(norm_type, out_dim)
-        self.act  = get_activation(act_type)
-
-    def forward(self, x):
-        if not self.depthwise:
-            return self.act(self.norm(self.conv(x)))
-        else:
-            # Depthwise conv
-            x = self.norm1(self.conv1(x))
-            # Pointwise conv
-            x = self.act(self.norm2(self.conv2(x)))
-            return x
-
-
-# --------------------- GELAN modules (from yolov9) ---------------------
-class ADown(nn.Module):
-    def __init__(self, in_dim, out_dim, act_type="silu", norm_type="BN", depthwise=False):
-        super().__init__()
-        inter_dim = out_dim // 2
-        self.conv_layer_1 = BasicConv(in_dim // 2, inter_dim,
-                                    kernel_size=3, padding=1, stride=2,
-                                    act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        self.conv_layer_2 = BasicConv(in_dim // 2, inter_dim, kernel_size=1,
-                                    act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-    def forward(self, x):
-        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
-        x1,x2 = x.chunk(2, 1)
-        x1 = self.conv_layer_1(x1)
-        x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
-        x2 = self.conv_layer_2(x2)
-
-        return torch.cat((x1, x2), 1)
-
-class RepConvN(nn.Module):
-    """RepConv is a basic rep-style block, including training and deploy status
-    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
-    """
-    def __init__(self, in_dim, out_dim, k=3, s=1, p=1, g=1, act_type='silu', norm_type='BN', depthwise=False):
-        super().__init__()
-        assert k == 3 and p == 1
-        self.g = g
-        self.in_dim = in_dim
-        self.out_dim = out_dim
-        self.act = get_activation(act_type)
-
-        self.bn = None
-        self.conv1 = BasicConv(in_dim, out_dim,
-                               kernel_size=k, padding=p, stride=s, group=g,
-                               act_type=None, norm_type=norm_type, depthwise=depthwise)
-        self.conv2 = BasicConv(in_dim, out_dim,
-                               kernel_size=1, padding=(p - k // 2), stride=s, group=g,
-                               act_type=None, norm_type=norm_type, depthwise=depthwise)
-
-    def forward(self, x):
-        """Forward process"""
-        if hasattr(self, 'conv'):
-            return self.forward_fuse(x)
-        else:
-            id_out = 0 if self.bn is None else self.bn(x)
-            return self.act(self.conv1(x) + self.conv2(x) + id_out)
-
-    def forward_fuse(self, x):
-        """Forward process"""
-        return self.act(self.conv(x))
-
-    def get_equivalent_kernel_bias(self):
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
-        kernelid, biasid = self._fuse_bn_tensor(self.bn)
-        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
-
-    def _avg_to_3x3_tensor(self, avgp):
-        channels = self.in_dim
-        groups = self.g
-        kernel_size = avgp.kernel_size
-        input_dim = channels // groups
-        k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
-        k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
-        return k
-
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
-        if kernel1x1 is None:
-            return 0
-        else:
-            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
-
-    def _fuse_bn_tensor(self, branch):
-        if branch is None:
-            return 0, 0
-        if isinstance(branch, BasicConv):
-            kernel       = branch.conv.weight
-            running_mean = branch.norm.running_mean
-            running_var  = branch.norm.running_var
-            gamma        = branch.norm.weight
-            beta         = branch.norm.bias
-            eps          = branch.norm.eps
-        elif isinstance(branch, nn.BatchNorm2d):
-            if not hasattr(self, 'id_tensor'):
-                input_dim = self.in_dim // self.g
-                kernel_value = np.zeros((self.in_dim, input_dim, 3, 3), dtype=np.float32)
-                for i in range(self.in_dim):
-                    kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
-            kernel       = self.id_tensor
-            running_mean = branch.running_mean
-            running_var  = branch.running_var
-            gamma        = branch.weight
-            beta         = branch.bias
-            eps          = branch.eps
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape(-1, 1, 1, 1)
-        return kernel * t, beta - running_mean * gamma / std
-
-    def fuse_convs(self):
-        if hasattr(self, 'conv'):
-            return
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.conv = nn.Conv2d(in_channels  = self.conv1.conv.in_channels,
-                              out_channels = self.conv1.conv.out_channels,
-                              kernel_size  = self.conv1.conv.kernel_size,
-                              stride       = self.conv1.conv.stride,
-                              padding      = self.conv1.conv.padding,
-                              dilation     = self.conv1.conv.dilation,
-                              groups       = self.conv1.conv.groups,
-                              bias         = True).requires_grad_(False)
-        self.conv.weight.data = kernel
-        self.conv.bias.data = bias
-        for para in self.parameters():
-            para.detach_()
-        self.__delattr__('conv1')
-        self.__delattr__('conv2')
-        if hasattr(self, 'nm'):
-            self.__delattr__('nm')
-        if hasattr(self, 'bn'):
-            self.__delattr__('bn')
-        if hasattr(self, 'id_tensor'):
-            self.__delattr__('id_tensor')
-
-class RepNBottleneck(nn.Module):
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 shortcut=True,
-                 kernel_size=(3, 3),
-                 expansion=0.5,
-                 act_type='silu',
-                 norm_type='BN',
-                 depthwise=False
-                 ):
-        super().__init__()
-        inter_dim = round(out_dim * expansion)
-        self.conv_layer_1 = RepConvN(in_dim, inter_dim, kernel_size[0], p=kernel_size[0]//2, s=1, act_type=act_type, norm_type=norm_type)
-        self.conv_layer_2 = BasicConv(inter_dim, out_dim, kernel_size[1], padding=kernel_size[1]//2, stride=1, act_type=act_type, norm_type=norm_type)
-        self.add = shortcut and in_dim == out_dim
-
-    def forward(self, x):
-        h = self.conv_layer_2(self.conv_layer_1(x))
-        return x + h if self.add else h
-
-class RepNCSP(nn.Module):
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 num_blocks=1,
-                 shortcut=True,
-                 expansion=0.5,
-                 act_type='silu',
-                 norm_type='BN',
-                 depthwise=False
-                 ):
-        super().__init__()
-        inter_dim = int(out_dim * expansion)
-        self.conv_layer_1 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.conv_layer_2 = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.conv_layer_3 = BasicConv(2 * inter_dim, out_dim, kernel_size=1)
-        self.module       = nn.Sequential(*(RepNBottleneck(inter_dim,
-                                                           inter_dim,
-                                                           kernel_size = [3, 3],
-                                                           shortcut    = shortcut,
-                                                           expansion   = 1.0,
-                                                           act_type    = act_type,
-                                                           norm_type   = norm_type,
-                                                           depthwise   = depthwise)
-                                                           for _ in range(num_blocks)))
-
-    def forward(self, x):
-        x1 = self.conv_layer_1(x)
-        x2 = self.module(self.conv_layer_2(x))
-
-        return self.conv_layer_3(torch.cat([x1, x2], dim=1))
-
-class RepGElanLayer(nn.Module):
-    """YOLOv9's GELAN module"""
-    def __init__(self,
-                 in_dim     :int,
-                 inter_dims :List,
-                 out_dim    :int,
-                 num_blocks :int   = 1,
-                 shortcut   :bool  = False,
-                 act_type   :str   = 'silu',
-                 norm_type  :str   = 'BN',
-                 depthwise  :bool  = False,
-                 ) -> None:
-        super(RepGElanLayer, self).__init__()
-        # ----------- Basic parameters -----------
-        self.in_dim = in_dim
-        self.inter_dims = inter_dims
-        self.out_dim = out_dim
-
-        # ----------- Network parameters -----------
-        self.conv_layer_1  = BasicConv(in_dim, inter_dims[0], kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.elan_module_1 = nn.Sequential(
-             RepNCSP(inter_dims[0]//2,
-                     inter_dims[1],
-                     num_blocks  = num_blocks,
-                     shortcut    = shortcut,
-                     expansion   = 0.5,
-                     act_type    = act_type,
-                     norm_type   = norm_type,
-                     depthwise   = depthwise),
-            BasicConv(inter_dims[1], inter_dims[1],
-                      kernel_size=3, padding=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-        self.elan_module_2 = nn.Sequential(
-             RepNCSP(inter_dims[1],
-                     inter_dims[1],
-                     num_blocks  = num_blocks,
-                     shortcut    = shortcut,
-                     expansion   = 0.5,
-                     act_type    = act_type,
-                     norm_type   = norm_type,
-                     depthwise   = depthwise),
-            BasicConv(inter_dims[1], inter_dims[1],
-                      kernel_size=3, padding=1,
-                      act_type=act_type, norm_type=norm_type, depthwise=depthwise)
-        )
-        self.conv_layer_2 = BasicConv(inter_dims[0] + 2*self.inter_dims[1], out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
-
-
-    def forward(self, x):
-        # Input proj
-        x1, x2 = torch.chunk(self.conv_layer_1(x), 2, dim=1)
-        out = list([x1, x2])
-
-        # ELAN module
-        out.append(self.elan_module_1(out[-1]))
-        out.append(self.elan_module_2(out[-1]))
-
-        # Output proj
-        out = self.conv_layer_2(torch.cat(out, dim=1))
-
-        return out
-    

+ 22 - 58
yolo/models/yolov9/gelan_head.py

@@ -2,72 +2,45 @@ import torch
 import torch.nn as nn
 
 try:
-    from .gelan_basic import BasicConv
+    from .modules import ConvModule
 except:
-    from  gelan_basic import BasicConv
+    from  modules import ConvModule
     
 
 # Single-level Head
-class SingleLevelHead(nn.Module):
+class DetHead(nn.Module):
     def __init__(self,
                  in_dim       :int  = 256,
                  cls_head_dim :int  = 256,
                  reg_head_dim :int  = 256,
                  num_cls_head :int  = 2,
                  num_reg_head :int  = 2,
-                 act_type     :str  = "silu",
-                 norm_type    :str  = "BN",
-                 depthwise    :bool = False):
+                 ):
         super().__init__()
         # --------- Basic Parameters ----------
         self.in_dim = in_dim
         self.num_cls_head = num_cls_head
         self.num_reg_head = num_reg_head
-        self.act_type = act_type
-        self.norm_type = norm_type
-        self.depthwise = depthwise
         
         # --------- Network Parameters ----------
-        ## cls head
+        ## classification head
         cls_feats = []
         self.cls_head_dim = cls_head_dim
         for i in range(num_cls_head):
             if i == 0:
-                cls_feats.append(
-                    BasicConv(in_dim, self.cls_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
             else:
-                cls_feats.append(
-                    BasicConv(self.cls_head_dim, self.cls_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
-        ## reg head
+                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
+        
+        ## bbox regression head
         reg_feats = []
         self.reg_head_dim = reg_head_dim
         for i in range(num_reg_head):
             if i == 0:
-                reg_feats.append(
-                    BasicConv(in_dim, self.reg_head_dim,
-                              kernel_size=3, padding=1, stride=1, 
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
             else:
-                reg_feats.append(
-                    BasicConv(self.reg_head_dim, self.reg_head_dim,
-                              kernel_size=3, padding=1, stride=1, group=4,
-                              act_type=act_type,
-                              norm_type=norm_type,
-                              depthwise=depthwise)
-                              )
+                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1, groups=4))
+        
         self.cls_feats = nn.Sequential(*cls_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
 
@@ -77,8 +50,6 @@ class SingleLevelHead(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, x):
@@ -94,24 +65,21 @@ class SingleLevelHead(nn.Module):
 class GElanDetHead(nn.Module):
     def __init__(self, cfg, in_dims):
         super().__init__()
+        self.num_levels = len(cfg.out_stride)
         ## ----------- Network Parameters -----------
         self.multi_level_heads = nn.ModuleList(
-            [SingleLevelHead(in_dim       = in_dims[level],
-                             cls_head_dim = max(in_dims[0], min(cfg.num_classes * 2, 128)),
-                             reg_head_dim = max(in_dims[0]//4, 16, 4*cfg.reg_max),
-                             num_cls_head = cfg.num_cls_head,
-                             num_reg_head = cfg.num_reg_head,
-                             act_type     = cfg.head_act,
-                             norm_type    = cfg.head_norm,
-                             depthwise    = cfg.head_depthwise)
-                             for level in range(cfg.num_levels)
-                             ])
+            [DetHead(in_dim = in_dims[level],
+                     cls_head_dim = max(in_dims[0], min(cfg.num_classes * 2, 128)),
+                     reg_head_dim = max(in_dims[0]//4, 16, 4*cfg.reg_max),
+                     num_cls_head = cfg.num_cls_head,
+                     num_reg_head = cfg.num_reg_head,
+                     ) for level in range(self.num_levels)])
+        
         # --------- Basic Parameters ----------
         self.in_dims = in_dims
         self.cls_head_dim = self.multi_level_heads[0].cls_head_dim
         self.reg_head_dim = self.multi_level_heads[0].reg_head_dim
 
-
     def forward(self, feats):
         """
             feats: List[(Tensor)] [[B, C, H, W], ...]
@@ -141,13 +109,9 @@ if __name__=='__main__':
             self.reg_max  = 16
             self.out_stride = [8, 16, 32]
             self.max_stride = 32
-            self.num_levels = 3
             ## Head
-            self.head_act  = 'lrelu'
-            self.head_norm = 'BN'
-            self.head_depthwise = False
-            self.num_cls_head   = 2
-            self.num_reg_head   = 2
+            self.num_cls_head = 2
+            self.num_reg_head = 2
 
     cfg = GElanBaseConfig()
     cfg.num_classes = 20

+ 7 - 46
yolo/models/yolov9/gelan_neck.py

@@ -1,47 +1,10 @@
 import torch
 import torch.nn as nn
 
-from .gelan_basic import BasicConv
-
-
-# SPPF (from yolov5)
-class SPPF(nn.Module):
-    """
-        This code referenced to https://github.com/ultralytics/yolov5
-    """
-    def __init__(self, cfg, in_dim, out_dim):
-        super().__init__()
-        ## ----------- Basic Parameters -----------
-        inter_dim = round(in_dim * cfg.neck_expand_ratio)
-        self.out_dim = out_dim
-        ## ----------- Network Parameters -----------
-        self.cv1 = BasicConv(in_dim, inter_dim,
-                             kernel_size=1, padding=0, stride=1,
-                             act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.cv2 = BasicConv(inter_dim * 4, out_dim,
-                             kernel_size=1, padding=0, stride=1,
-                             act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.m = nn.MaxPool2d(kernel_size=cfg.spp_pooling_size,
-                              stride=1,
-                              padding=cfg.spp_pooling_size // 2)
-
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
-                m.reset_parameters()
-
-    def forward(self, x):
-        x = self.cv1(x)
-        y1 = self.m(x)
-        y2 = self.m(y1)
-
-        return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
+try:
+    from .modules import ConvModule
+except:
+    from  modules import ConvModule
 
 # SPP-ELAN (from yolov9)
 class SPPElan(nn.Module):
@@ -53,9 +16,9 @@ class SPPElan(nn.Module):
         self.inter_dim = cfg.spp_inter_dim
         self.out_dim   = cfg.spp_out_dim
         ## ----------- Network Parameters -----------
-        self.conv_layer_1 = BasicConv(in_dim, self.inter_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.conv_layer_2 = BasicConv(self.inter_dim * 4, self.out_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.pool_layer   = nn.MaxPool2d(kernel_size=cfg.spp_pooling_size, stride=1, padding=cfg.spp_pooling_size // 2)
+        self.conv_layer_1 = ConvModule(in_dim, self.inter_dim, kernel_size=1)
+        self.conv_layer_2 = ConvModule(self.inter_dim * 4, self.out_dim, kernel_size=1)
+        self.pool_layer   = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
 
         # Initialize all layers
         self.init_weights()
@@ -64,8 +27,6 @@ class SPPElan(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, x):

+ 6 - 31
yolo/models/yolov9/gelan_pafpn.py

@@ -4,20 +4,15 @@ import torch.nn.functional as F
 from typing import List
 
 try:
-    from .gelan_basic import RepGElanLayer, ADown
+    from .modules import RepGElanLayer, ADown
 except:
-    from  gelan_basic import RepGElanLayer, ADown
+    from  modules import RepGElanLayer, ADown
 
 
 # PaFPN-ELAN
 class GElanPaFPN(nn.Module):
-    def __init__(self,
-                 cfg,
-                 in_dims :List = [256, 512, 256],
-                 ) -> None:
+    def __init__(self, cfg, in_dims :List = [256, 512, 256]):
         super(GElanPaFPN, self).__init__()
-        print('==============================')
-        print('FPN: {}'.format("GELAN PaFPN"))
         # --------------------------- Basic Parameters ---------------------------
         self.in_dims = in_dims[::-1]
         self.out_dims = [cfg.fpn_feats_td["p3"][1], cfg.fpn_feats_bu["p4"][1], cfg.fpn_feats_bu["p5"][1]]
@@ -29,9 +24,6 @@ class GElanPaFPN(nn.Module):
                                               out_dim    = cfg.fpn_feats_td["p4"][1],
                                               num_blocks = cfg.fpn_depth,
                                               shortcut   = False,
-                                              act_type   = cfg.fpn_act,
-                                              norm_type  = cfg.fpn_norm,
-                                              depthwise  = cfg.fpn_depthwise,
                                               )
         ## P4 -> P3
         self.top_down_layer_2 = RepGElanLayer(in_dim     = cfg.fpn_feats_td["p4"][1] + self.in_dims[2],
@@ -39,34 +31,23 @@ class GElanPaFPN(nn.Module):
                                               out_dim    = cfg.fpn_feats_td["p3"][1],
                                               num_blocks = cfg.fpn_depth,
                                               shortcut   = False,
-                                              act_type   = cfg.fpn_act,
-                                              norm_type  = cfg.fpn_norm,
-                                              depthwise  = cfg.fpn_depthwise,
                                               )
         # ---------------- Bottom up ----------------
         ## P3 -> P4
-        self.dowmsample_layer_1 = ADown(cfg.fpn_feats_td["p3"][1], cfg.fpn_feats_td["p3"][1],
-                                        act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
+        self.dowmsample_layer_1 = ADown(cfg.fpn_feats_td["p3"][1], cfg.fpn_feats_td["p3"][1])
         self.bottom_up_layer_1  = RepGElanLayer(in_dim     = cfg.fpn_feats_td["p3"][1] + cfg.fpn_feats_td["p4"][1],
                                                 inter_dims = cfg.fpn_feats_bu["p4"][0],
                                                 out_dim    = cfg.fpn_feats_bu["p4"][1],
                                                 num_blocks = cfg.fpn_depth,
                                                 shortcut   = False,
-                                                act_type   = cfg.fpn_act,
-                                                norm_type  = cfg.fpn_norm,
-                                                depthwise  = cfg.fpn_depthwise,
                                                 )
         ## P4 -> P5
-        self.dowmsample_layer_2 = ADown(cfg.fpn_feats_bu["p4"][1], cfg.fpn_feats_bu["p4"][1],
-                                        act_type=cfg.fpn_act, norm_type=cfg.fpn_norm, depthwise=cfg.fpn_depthwise)
+        self.dowmsample_layer_2 = ADown(cfg.fpn_feats_bu["p4"][1], cfg.fpn_feats_bu["p4"][1])
         self.bottom_up_layer_2  = RepGElanLayer(in_dim     = cfg.fpn_feats_td["p4"][1] + self.in_dims[0],
                                                 inter_dims = cfg.fpn_feats_bu["p5"][0],
                                                 out_dim    = cfg.fpn_feats_bu["p5"][1],
                                                 num_blocks = cfg.fpn_depth,
                                                 shortcut   = False,
-                                                act_type   = cfg.fpn_act,
-                                                norm_type  = cfg.fpn_norm,
-                                                depthwise  = cfg.fpn_depthwise,
                                                 )
         
         self.init_weights()
@@ -75,8 +56,6 @@ class GElanPaFPN(nn.Module):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
-                # In order to be consistent with the source code,
-                # reset the Conv2d initialization parameters
                 m.reset_parameters()
 
     def forward(self, features):
@@ -117,14 +96,10 @@ if __name__=='__main__':
             self.width    = 0.50
             self.depth    = 0.34
             self.ratio    = 2.0
+
             self.out_stride = [8, 16, 32]
             self.max_stride = 32
-            self.num_levels = 3
             ## FPN
-            self.fpn      = 'gelan_pafpn'
-            self.fpn_act  = 'silu'
-            self.fpn_norm = 'BN'
-            self.fpn_depthwise = False
             self.fpn_depth    = 3
             self.fpn_feats_td = {
                 "p4": [[256, 128], 256],

+ 12 - 15
yolo/models/yolov9/gelan_pred.py

@@ -5,7 +5,7 @@ import torch.nn.functional as F
 
 
 # Single-level pred layer
-class SingleLevelPredLayer(nn.Module):
+class PredLayer(nn.Module):
     def __init__(self,
                  cls_dim     :int = 256,
                  reg_dim     :int = 256,
@@ -84,28 +84,25 @@ class SingleLevelPredLayer(nn.Module):
 
 # Multi-level pred layer
 class GElanPredLayer(nn.Module):
-    def __init__(self,
-                 cfg,
-                 cls_dim,
-                 reg_dim,
-                 ):
+    def __init__(self, cfg, cls_dim: int, reg_dim: int):
         super().__init__()
         # --------- Basic Parameters ----------
         self.cfg = cfg
         self.cls_dim = cls_dim
         self.reg_dim = reg_dim
+        self.num_levels = len(cfg.out_stride)
 
         # ----------- Network Parameters -----------
         ## pred layers
         self.multi_level_preds = nn.ModuleList(
-            [SingleLevelPredLayer(cls_dim     = cls_dim,
-                                  reg_dim     = reg_dim,
-                                  stride      = cfg.out_stride[level],
-                                  reg_max     = cfg.reg_max,
-                                  num_classes = cfg.num_classes,
-                                  num_coords  = 4 * cfg.reg_max)
-                                  for level in range(cfg.num_levels)
-                                  ])
+            [PredLayer(cls_dim     = cls_dim,
+                       reg_dim     = reg_dim,
+                       stride      = cfg.out_stride[level],
+                       reg_max     = cfg.reg_max,
+                       num_classes = cfg.num_classes,
+                       num_coords  = 4 * cfg.reg_max)
+                       for level in range(self.num_levels)
+                       ])
         ## proj conv
         proj_init = torch.arange(cfg.reg_max, dtype=torch.float)
         self.proj_conv = nn.Conv2d(cfg.reg_max, 1, kernel_size=1, bias=False).requires_grad_(False)
@@ -117,7 +114,7 @@ class GElanPredLayer(nn.Module):
         all_cls_preds = []
         all_reg_preds = []
         all_box_preds = []
-        for level in range(self.cfg.num_levels):
+        for level in range(self.num_levels):
             # -------------- Single-level prediction --------------
             outputs = self.multi_level_preds[level](cls_feats[level], reg_feats[level])
 

+ 238 - 0
yolo/models/yolov9/modules.py

@@ -0,0 +1,238 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from typing import List
+
+
+# --------------------- Basic modules ---------------------
+class ConvModule(nn.Module):
+    def __init__(self, 
+                 in_dim,        # in channels
+                 out_dim,       # out channels 
+                 kernel_size=1, # kernel size 
+                 padding=0,     # padding
+                 stride=1,      # padding
+                 groups=1,      # groups
+                ):
+        super(ConvModule, self).__init__()
+        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, groups=groups, bias=False)
+        self.norm = nn.BatchNorm2d(out_dim)
+        self.act  = nn.SiLU(inplace=True)
+
+    def forward(self, x):
+        return self.act(self.norm(self.conv(x)))
+
+
+# --------------------- GELAN modules (from yolov9) ---------------------
+class ADown(nn.Module):
+    def __init__(self, in_dim, out_dim,):
+        super().__init__()
+        inter_dim = out_dim // 2
+        self.conv_layer_1 = ConvModule(in_dim // 2, inter_dim, kernel_size=3, padding=1, stride=2)
+        self.conv_layer_2 = ConvModule(in_dim // 2, inter_dim, kernel_size=1)
+    
+    def forward(self, x):
+        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
+        x1,x2 = x.chunk(2, 1)
+        x1 = self.conv_layer_1(x1)
+        x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
+        x2 = self.conv_layer_2(x2)
+
+        return torch.cat((x1, x2), 1)
+
+class RepConvN(nn.Module):
+    """RepConv is a basic rep-style block, including training and deploy status
+    This code is based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
+    """
+    def __init__(self, in_dim, out_dim, k=3, s=1, p=1,):
+        super().__init__()
+        assert k == 3 and p == 1
+        self.in_dim = in_dim
+        self.out_dim = out_dim
+        self.act = nn.SiLU(inplace=True)
+
+        self.bn = None
+        self.conv1 = ConvModule(in_dim, out_dim, kernel_size=k, padding=p, stride=s)
+        self.conv2 = ConvModule(in_dim, out_dim, kernel_size=1, padding=(p - k // 2), stride=s)
+
+    def forward(self, x):
+        """Forward process"""
+        if hasattr(self, 'conv'):
+            return self.forward_fuse(x)
+        else:
+            id_out = 0 if self.bn is None else self.bn(x)
+            return self.act(self.conv1(x) + self.conv2(x) + id_out)
+
+    def forward_fuse(self, x):
+        """Forward process"""
+        return self.act(self.conv(x))
+
+    def get_equivalent_kernel_bias(self):
+        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
+        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
+        kernelid, biasid = self._fuse_bn_tensor(self.bn)
+        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
+
+    def _avg_to_3x3_tensor(self, avgp):
+        channels = self.in_dim
+        groups = self.g
+        kernel_size = avgp.kernel_size
+        input_dim = channels // groups
+        k = torch.zeros((channels, input_dim, kernel_size, kernel_size))
+        k[np.arange(channels), np.tile(np.arange(input_dim), groups), :, :] = 1.0 / kernel_size ** 2
+        return k
+
+    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
+        if kernel1x1 is None:
+            return 0
+        else:
+            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
+
+    def _fuse_bn_tensor(self, branch):
+        if branch is None:
+            return 0, 0
+        if isinstance(branch, ConvModule):
+            kernel       = branch.conv.weight
+            running_mean = branch.norm.running_mean
+            running_var  = branch.norm.running_var
+            gamma        = branch.norm.weight
+            beta         = branch.norm.bias
+            eps          = branch.norm.eps
+        elif isinstance(branch, nn.BatchNorm2d):
+            if not hasattr(self, 'id_tensor'):
+                input_dim = self.in_dim // self.g
+                kernel_value = np.zeros((self.in_dim, input_dim, 3, 3), dtype=np.float32)
+                for i in range(self.in_dim):
+                    kernel_value[i, i % input_dim, 1, 1] = 1
+                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
+            kernel       = self.id_tensor
+            running_mean = branch.running_mean
+            running_var  = branch.running_var
+            gamma        = branch.weight
+            beta         = branch.bias
+            eps          = branch.eps
+        std = (running_var + eps).sqrt()
+        t = (gamma / std).reshape(-1, 1, 1, 1)
+        return kernel * t, beta - running_mean * gamma / std
+
+    def fuse_convs(self):
+        if hasattr(self, 'conv'):
+            return
+        kernel, bias = self.get_equivalent_kernel_bias()
+        self.conv = nn.Conv2d(in_channels  = self.conv1.conv.in_channels,
+                              out_channels = self.conv1.conv.out_channels,
+                              kernel_size  = self.conv1.conv.kernel_size,
+                              stride       = self.conv1.conv.stride,
+                              padding      = self.conv1.conv.padding,
+                              dilation     = self.conv1.conv.dilation,
+                              groups       = self.conv1.conv.groups,
+                              bias         = True).requires_grad_(False)
+        self.conv.weight.data = kernel
+        self.conv.bias.data = bias
+        for para in self.parameters():
+            para.detach_()
+        self.__delattr__('conv1')
+        self.__delattr__('conv2')
+        if hasattr(self, 'nm'):
+            self.__delattr__('nm')
+        if hasattr(self, 'bn'):
+            self.__delattr__('bn')
+        if hasattr(self, 'id_tensor'):
+            self.__delattr__('id_tensor')
+
+class RepNBottleneck(nn.Module):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 shortcut: bool = True,
+                 kernel_size: List = (3, 3),
+                 expansion: float = 0.5,
+                 ):
+        super().__init__()
+        inter_dim = round(out_dim * expansion)
+        self.conv_layer_1 = RepConvN(in_dim, inter_dim, kernel_size[0], p=kernel_size[0]//2, s=1)
+        self.conv_layer_2 = ConvModule(inter_dim, out_dim, kernel_size[1], padding=kernel_size[1]//2, stride=1)
+        self.add = shortcut and in_dim == out_dim
+
+    def forward(self, x):
+        h = self.conv_layer_2(self.conv_layer_1(x))
+        return x + h if self.add else h
+
+class RepNCSP(nn.Module):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 num_blocks: int = 1,
+                 shortcut: bool = True,
+                 expansion:float = 0.5,
+                 ):
+        super().__init__()
+        inter_dim = int(out_dim * expansion)
+        self.conv_layer_1 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.conv_layer_2 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.conv_layer_3 = ConvModule(2 * inter_dim, out_dim, kernel_size=1)
+        self.module = nn.Sequential(*[
+            RepNBottleneck(in_dim = inter_dim,
+                           out_dim = inter_dim,
+                           kernel_size = [3, 3],
+                           shortcut    = shortcut,
+                           expansion   = 1.0,
+                           ) for _ in range(num_blocks)])
+
+    def forward(self, x):
+        x1 = self.conv_layer_1(x)
+        x2 = self.module(self.conv_layer_2(x))
+
+        return self.conv_layer_3(torch.cat([x1, x2], dim=1))
+
+class RepGElanLayer(nn.Module):
+    """YOLOv9's GELAN module"""
+    def __init__(self,
+                 in_dim     :int,
+                 inter_dims :List,
+                 out_dim    :int,
+                 num_blocks :int   = 1,
+                 shortcut   :bool  = False,
+                 ):
+        super(RepGElanLayer, self).__init__()
+        # ----------- Basic parameters -----------
+        self.in_dim = in_dim
+        self.inter_dims = inter_dims
+        self.out_dim = out_dim
+
+        # ----------- Network parameters -----------
+        self.conv_layer_1  = ConvModule(in_dim, inter_dims[0], kernel_size=1)
+        self.elan_module_1 = nn.Sequential(
+            RepNCSP(inter_dims[0]//2,
+                    inter_dims[1],
+                    num_blocks  = num_blocks,
+                    shortcut    = shortcut,
+                    expansion   = 0.5,
+                    ),
+            ConvModule(inter_dims[1], inter_dims[1], kernel_size=3, padding=1)
+        )
+        self.elan_module_2 = nn.Sequential(
+            RepNCSP(inter_dims[1],
+                    inter_dims[1],
+                    num_blocks  = num_blocks,
+                    shortcut    = shortcut,
+                    expansion   = 0.5,
+                    ),
+            ConvModule(inter_dims[1], inter_dims[1],kernel_size=3, padding=1)
+        )
+        self.conv_layer_2 = ConvModule(inter_dims[0] + 2*self.inter_dims[1], out_dim, kernel_size=1)
+
+    def forward(self, x):
+        # Input proj
+        x1, x2 = torch.chunk(self.conv_layer_1(x), 2, dim=1)
+        out = list([x1, x2])
+
+        # ELAN module
+        out.append(self.elan_module_1(out[-1]))
+        out.append(self.elan_module_2(out[-1]))
+
+        # Output proj
+        out = self.conv_layer_2(torch.cat(out, dim=1))
+
+        return out
+    

+ 2 - 2
yolo/models/yolox/yolox_backbone.py

@@ -22,7 +22,7 @@ class YoloxBackbone(nn.Module):
     def __init__(self, cfg):
         super(YoloxBackbone, self).__init__()
         # ------------------ Basic setting ------------------
-        self.model_scale = cfg.scale
+        self.model_scale = cfg.model_scale
         self.feat_dims = [round(64   * cfg.width),
                           round(128  * cfg.width),
                           round(256  * cfg.width),
@@ -129,7 +129,7 @@ if __name__ == '__main__':
         def __init__(self) -> None:
             self.width = 0.5
             self.depth = 0.34
-            self.scale = "s"
+            self.model_scale = "s"
             self.use_pretrained = True
 
     cfg = BaseConfig()

+ 1 - 0
yolo/test.py

@@ -123,6 +123,7 @@ if __name__ == '__main__':
 
     # Build model
     model = build_model(args, cfg, is_val=False)
+    print(model)
 
     # Load trained weight
     model = load_weight(model, args.weight, args.fuse_conv_bn)