yjh0410 преди 1 година
родител
ревизия
9d57404b71
променени са 3 файла, в които са добавени 104 реда и са изтрити 60 реда
  1. 3 2
      yolo/models/yolov7_af/yolov7_af.py
  2. 84 41
      yolo/models/yolov7_af/yolov7_af_backbone.py
  3. 17 17
      yolo/models/yolov7_af/yolov7_af_pafpn.py

+ 3 - 2
yolo/models/yolov7_af/yolov7_af.py

@@ -3,7 +3,7 @@ import torch
 import torch.nn as nn
 
 # --------------- Model components ---------------
-from .yolov7_af_backbone import Yolov7Backbone
+from .yolov7_af_backbone import Yolov7TBackbone, Yolov7LBackbone
 from .yolov7_af_neck     import SPPF
 from .yolov7_af_pafpn    import Yolov7PaFPN
 from .yolov7_af_head     import Yolov7DetHead
@@ -21,6 +21,7 @@ class Yolov7AF(nn.Module):
                  ) -> None:
         super(Yolov7AF, self).__init__()
         # ---------------------- Basic setting ----------------------
+        assert cfg.scale in ["t", "l", "x"]
         self.cfg = cfg
         self.num_classes = cfg.num_classes
         ## Post-process parameters
@@ -31,7 +32,7 @@ class Yolov7AF(nn.Module):
         
         # ---------------------- Network Parameters ----------------------
         ## Backbone
-        self.backbone = Yolov7Backbone(cfg)
+        self.backbone = Yolov7TBackbone(cfg) if cfg.scale == "t" else Yolov7LBackbone(cfg)
         self.pyramid_feat_dims = self.backbone.feat_dims[-3:]
         ## Neck: SPP
         self.neck     = SPPF(cfg, self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1]//2)

+ 84 - 41
yolo/models/yolov7_af/yolov7_af_backbone.py

@@ -8,31 +8,24 @@ except:
 
 
 # ELANNet
-class Yolov7Backbone(nn.Module):
+class Yolov7TBackbone(nn.Module):
     def __init__(self, cfg):
-        super(Yolov7Backbone, self).__init__()
+        super(Yolov7TBackbone, self).__init__()
         # ---------------- Basic parameters ----------------
         self.model_scale = cfg.scale
         self.bk_act = cfg.bk_act
         self.bk_norm = cfg.bk_norm
         self.bk_depthwise = cfg.bk_depthwise
-        if self.model_scale in ["l", "x"]:
-            self.elan_depth = 2
-            self.feat_dims = [round(64   * cfg.width), round(128  * cfg.width), round(256  * cfg.width),
-                              round(512  * cfg.width), round(1024 * cfg.width), round(1024 * cfg.width)]
-            self.last_stage_eratio = 0.25
-        if self.model_scale in ["t"]:
-            self.elan_depth = 1
-            self.feat_dims = [round(64  * cfg.width), round(128  * cfg.width),
-                              round(256  * cfg.width), round(512 * cfg.width), round(1024 * cfg.width)]
-            self.last_stage_eratio = 0.5
+        self.elan_depth = 1
+        self.feat_dims = [round(64  * cfg.width), round(128  * cfg.width),
+                          round(256  * cfg.width), round(512 * cfg.width), round(1024 * cfg.width)]
 
         # ---------------- Model parameters ----------------
         self.layer_1 = self.make_stem(3, self.feat_dims[0])
         self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], expansion=0.5)
         self.layer_3 = self.make_block(self.feat_dims[1], self.feat_dims[2], expansion=0.5)
         self.layer_4 = self.make_block(self.feat_dims[2], self.feat_dims[3], expansion=0.5)
-        self.layer_5 = self.make_block(self.feat_dims[3], self.feat_dims[4], expansion=self.last_stage_eratio)
+        self.layer_5 = self.make_block(self.feat_dims[3], self.feat_dims[4], expansion=0.5)
 
         # Initialize all layers
         self.init_weights()
@@ -46,42 +39,92 @@ class Yolov7Backbone(nn.Module):
                 m.reset_parameters()
 
     def make_stem(self, in_dim, out_dim):
-        if self.model_scale in ["l", "x"]:
-            stem = nn.Sequential(
-                BasicConv(in_dim, out_dim//2, kernel_size=3, padding=1, stride=1,
-                          act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
-                BasicConv(out_dim//2, out_dim, kernel_size=3, padding=1, stride=2,
-                          act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
-                BasicConv(out_dim, out_dim, kernel_size=3, padding=1, stride=1,
-                          act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise)
-
-            )
-        elif self.model_scale in ["t"]:
-            stem = BasicConv(in_dim, out_dim, kernel_size=6, padding=2, stride=2,
-                              act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise)
-        else:
-            raise NotImplementedError("Unknown model scale: {}".format(self.model_scale))
+        stem = BasicConv(in_dim, out_dim, kernel_size=6, padding=2, stride=2,
+                         act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise)
         
         return stem
 
     def make_block(self, in_dim, out_dim, expansion=0.5):
-        if self.model_scale in ["l", "x"]:
+        block = nn.Sequential(
+            nn.MaxPool2d((2, 2), stride=2),             
+            ELANLayer(in_dim, out_dim,
+                    expansion=expansion, num_blocks=self.elan_depth,
+                    act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
+                    )
+
+        return block
+    
+    def forward(self, x):
+        c1 = self.layer_1(x)
+        c2 = self.layer_2(c1)
+        c3 = self.layer_3(c2)
+        c4 = self.layer_4(c3)
+        c5 = self.layer_5(c4)
+        outputs = [c3, c4, c5]
+
+        return outputs
+
+
+class Yolov7LBackbone(nn.Module):
+    def __init__(self, cfg):
+        super(Yolov7LBackbone, self).__init__()
+        # ---------------- Basic parameters ----------------
+        self.model_scale = cfg.scale
+        self.bk_act = cfg.bk_act
+        self.bk_norm = cfg.bk_norm
+        self.bk_depthwise = cfg.bk_depthwise
+        self.elan_depth = 2
+        self.feat_dims = [round(64  * cfg.width), round(128  * cfg.width), round(256  * cfg.width),
+                          round(512  * cfg.width), round(1024 * cfg.width), round(1024 * cfg.width)]
+
+        # ---------------- Model parameters ----------------
+        self.layer_1 = self.make_stem(3, self.feat_dims[0])
+        self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], self.feat_dims[2], expansion=0.5, conv_downsample=True)
+        self.layer_3 = self.make_block(self.feat_dims[2], self.feat_dims[2], self.feat_dims[3], expansion=0.5)
+        self.layer_4 = self.make_block(self.feat_dims[3], self.feat_dims[3], self.feat_dims[4], expansion=0.5)
+        self.layer_5 = self.make_block(self.feat_dims[4], self.feat_dims[4], self.feat_dims[5], expansion=0.25)
+
+        # Initialize all layers
+        self.init_weights()
+        
+    def init_weights(self):
+        """Initialize the parameters."""
+        for m in self.modules():
+            if isinstance(m, torch.nn.Conv2d):
+                # In order to be consistent with the source code,
+                # reset the Conv2d initialization parameters
+                m.reset_parameters()
+
+    def make_stem(self, in_dim, out_dim):
+        stem = nn.Sequential(
+            BasicConv(in_dim, out_dim//2, kernel_size=3, padding=1, stride=1,
+                        act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
+            BasicConv(out_dim//2, out_dim, kernel_size=3, padding=1, stride=2,
+                        act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
+            BasicConv(out_dim, out_dim, kernel_size=3, padding=1, stride=1,
+                        act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise)
+
+        )
+
+        return stem
+
+    def make_block(self, in_dim, out_dim_1, out_dim_2, expansion=0.5, conv_downsample=False):
+        if conv_downsample:
             block = nn.Sequential(
-                MDown(in_dim, out_dim,
-                    act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),             
-                ELANLayer(out_dim, out_dim,
+                BasicConv(in_dim, out_dim_1, kernel_size=3, padding=1, stride=2,
+                         act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),             
+                ELANLayer(out_dim_1, out_dim_2,
                         expansion=expansion, num_blocks=self.elan_depth,
                         act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
-            )
-        elif self.model_scale in ["t"]:
+        )
+        else:
             block = nn.Sequential(
-                nn.MaxPool2d((2, 2), stride=2),             
-                ELANLayer(in_dim, out_dim,
+                MDown(in_dim, out_dim_1,
+                    act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),             
+                ELANLayer(out_dim_1, out_dim_2,
                         expansion=expansion, num_blocks=self.elan_depth,
                         act_type=self.bk_act, norm_type=self.bk_norm, depthwise=self.bk_depthwise),
             )
-        else:
-            raise NotImplementedError("Unknown model scale: {}".format(self.model_scale))
         
         return block
     
@@ -104,12 +147,12 @@ if __name__ == '__main__':
             self.bk_act = 'silu'
             self.bk_norm = 'BN'
             self.bk_depthwise = False
-            self.width = 0.5
+            self.width = 1.0
             self.depth = 0.34
-            self.scale = "t"
+            self.scale = "l"
 
     cfg = BaseConfig()
-    model = Yolov7Backbone(cfg)
+    model = Yolov7LBackbone(cfg)
     x = torch.randn(1, 3, 640, 640)
     t0 = time.time()
     outputs = model(x)

+ 17 - 17
yolo/models/yolov7_af/yolov7_af_pafpn.py

@@ -21,35 +21,35 @@ class Yolov7PaFPN(nn.Module):
                                         kernel_size=1, act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
         self.reduce_layer_2 = BasicConv(c4, round(256*cfg.width),
                                         kernel_size=1, act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
-        self.top_down_layer_1 = ELANLayerFPN(in_dim     = round(256*cfg.width) + round(256*cfg.width),
-                                             out_dim    = round(256*cfg.width),
+        self.top_down_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
+                                             out_dim      = round(256*cfg.width),
                                              expansions   = cfg.fpn_expansions,
                                              branch_width = cfg.fpn_block_bw,
                                              branch_depth = cfg.fpn_block_dw,
-                                             act_type   = cfg.fpn_act,
-                                             norm_type  = cfg.fpn_norm,
-                                             depthwise  = cfg.fpn_depthwise,
+                                             act_type     = cfg.fpn_act,
+                                             norm_type    = cfg.fpn_norm,
+                                             depthwise    = cfg.fpn_depthwise,
                                              )
         ## P4 -> P3
         self.reduce_layer_3 = BasicConv(round(256*cfg.width), round(128*cfg.width),
                                         kernel_size=1, act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
         self.reduce_layer_4 = BasicConv(c3, round(128*cfg.width),
                                         kernel_size=1, act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
-        self.top_down_layer_2 = ELANLayerFPN(in_dim     = round(128*cfg.width) + round(128*cfg.width),
-                                             out_dim    = round(128*cfg.width),
+        self.top_down_layer_2 = ELANLayerFPN(in_dim       = round(128*cfg.width) + round(128*cfg.width),
+                                             out_dim      = round(128*cfg.width),
                                              expansions   = cfg.fpn_expansions,
                                              branch_width = cfg.fpn_block_bw,
                                              branch_depth = cfg.fpn_block_dw,
-                                             act_type   = cfg.fpn_act,
-                                             norm_type  = cfg.fpn_norm,
-                                             depthwise  = cfg.fpn_depthwise,
+                                             act_type     = cfg.fpn_act,
+                                             norm_type    = cfg.fpn_norm,
+                                             depthwise    = cfg.fpn_depthwise,
                                              )
         # ----------------------------- Yolov7's Bottom-up PAN -----------------------------
         ## P3 -> P4
         self.downsample_layer_1 = MDown(round(128*cfg.width), round(256*cfg.width),
                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
-        self.bottom_up_layer_1 = ELANLayerFPN(in_dim     = round(256*cfg.width) + round(256*cfg.width),
-                                              out_dim    = round(256*cfg.width),
+        self.bottom_up_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
+                                              out_dim      = round(256*cfg.width),
                                               expansions   = cfg.fpn_expansions,
                                               branch_width = cfg.fpn_block_bw,
                                               branch_depth = cfg.fpn_block_dw,
@@ -60,14 +60,14 @@ class Yolov7PaFPN(nn.Module):
         ## P4 -> P5
         self.downsample_layer_2 = MDown(round(256*cfg.width), round(512*cfg.width),
                                         act_type=cfg.fpn_act, norm_type=cfg.fpn_norm)
-        self.bottom_up_layer_2 = ELANLayerFPN(in_dim     = round(512*cfg.width) + c5,
-                                              out_dim    = round(512*cfg.width),
+        self.bottom_up_layer_2 = ELANLayerFPN(in_dim       = round(512*cfg.width) + c5,
+                                              out_dim      = round(512*cfg.width),
                                               expansions   = cfg.fpn_expansions,
                                               branch_width = cfg.fpn_block_bw,
                                               branch_depth = cfg.fpn_block_dw,
-                                              act_type   = cfg.fpn_act,
-                                              norm_type  = cfg.fpn_norm,
-                                              depthwise  = cfg.fpn_depthwise,
+                                              act_type     = cfg.fpn_act,
+                                              norm_type    = cfg.fpn_norm,
+                                              depthwise    = cfg.fpn_depthwise,
                                               )
 
         # ----------------------------- Head conv layers -----------------------------