yjh0410 11 сар өмнө
parent
commit
1a10122af0

+ 1 - 1
yolo/config/yolov4_config.py

@@ -26,7 +26,7 @@ class Yolov4Config(object):
         self.val_conf_thresh = 0.001
         self.val_conf_thresh = 0.001
         self.val_nms_thresh  = 0.7
         self.val_nms_thresh  = 0.7
         self.test_topk = 100
         self.test_topk = 100
-        self.test_conf_thresh = 0.3
+        self.test_conf_thresh = 0.45
         self.test_nms_thresh  = 0.5
         self.test_nms_thresh  = 0.5
 
 
         # ---------------- Assignment config ----------------
         # ---------------- Assignment config ----------------

+ 5 - 49
yolo/config/yolov7_config.py

@@ -2,27 +2,16 @@
 
 
 
 
 def build_yolov7_config(args):
 def build_yolov7_config(args):
-    if   args.model == 'yolov7_t':
-        return Yolov7AFTConfig()
-    elif args.model == 'yolov7_l':
-        return Yolov7AFLConfig()
-    else:
-        raise NotImplementedError("No config for model: {}".format(args.model))
+    return Yolov7AFConfig()
     
     
-# YOLOv7AF-Base config
-class Yolov7AFBaseConfig(object):
+# Anchor-free YOLOv7 config
+class Yolov7AFConfig(object):
     def __init__(self) -> None:
     def __init__(self) -> None:
         # ---------------- Model config ----------------
         # ---------------- Model config ----------------
-        self.width    = 1.0
         self.out_stride = [8, 16, 32]
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
         self.max_stride = 32
-        self.model_scale = "b"
         ## Backbone
         ## Backbone
         self.use_pretrained = True
         self.use_pretrained = True
-        ## FPN
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 4
-        self.fpn_block_dw = 1
         ## Head
         ## Head
         self.head_dim     = 256
         self.head_dim     = 256
         self.num_cls_head = 2
         self.num_cls_head = 2
@@ -34,7 +23,7 @@ class Yolov7AFBaseConfig(object):
         self.val_conf_thresh = 0.001
         self.val_conf_thresh = 0.001
         self.val_nms_thresh  = 0.7
         self.val_nms_thresh  = 0.7
         self.test_topk = 100
         self.test_topk = 100
-        self.test_conf_thresh = 0.4
+        self.test_conf_thresh = 0.45
         self.test_nms_thresh  = 0.5
         self.test_nms_thresh  = 0.5
 
 
         # ---------------- Assignment config ----------------
         # ---------------- Assignment config ----------------
@@ -73,7 +62,7 @@ class Yolov7AFBaseConfig(object):
         # ---------------- Data process config ----------------
         # ---------------- Data process config ----------------
         self.aug_type = 'yolo'
         self.aug_type = 'yolo'
         self.mosaic_prob = 1.0
         self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.15
+        self.mixup_prob  = 0.1
         self.copy_paste  = 0.0           # approximated by the YOLOX's mixup
         self.copy_paste  = 0.0           # approximated by the YOLOX's mixup
         self.multi_scale = [0.5, 1.25]   # multi scale: [img_size * 0.5, img_size * 1.25]
         self.multi_scale = [0.5, 1.25]   # multi scale: [img_size * 0.5, img_size * 1.25]
         ## Pixel mean & std
         ## Pixel mean & std
@@ -97,36 +86,3 @@ class Yolov7AFBaseConfig(object):
         config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
         config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
         for k, v in config_dict.items():
         for k, v in config_dict.items():
             print("{} : {}".format(k, v))
             print("{} : {}".format(k, v))
-
-# YOLOv7-S
-class Yolov7AFTConfig(Yolov7AFBaseConfig):
-    def __init__(self) -> None:
-        super().__init__()
-        # ---------------- Model config ----------------
-        self.width = 0.50
-        self.model_scale = "t"
-        self.use_pretrained = True
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 2
-        self.fpn_block_dw = 1
-
-        # ---------------- Data process config ----------------
-        self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.0
-        self.copy_paste  = 0.5
-
-# YOLOv7-L
-class Yolov7AFLConfig(Yolov7AFBaseConfig):
-    def __init__(self) -> None:
-        super().__init__()
-        # ---------------- Model config ----------------
-        self.width = 1.0
-        self.model_scale = "l"
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 4
-        self.fpn_block_dw = 1
-
-        # ---------------- Data process config ----------------
-        self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.1
-        self.copy_paste  = 0.5

+ 1 - 1
yolo/eval.py

@@ -1,7 +1,7 @@
 import argparse
 import argparse
 import torch
 import torch
 
 
-from evaluator.map_evaluator import MapEvaluator
+from map_evaluator import MapEvaluator
 from dataset.build import build_dataset, build_transform
 from dataset.build import build_dataset, build_transform
 from utils.misc import load_weight
 from utils.misc import load_weight
 
 

+ 0 - 0
yolo/evaluator/map_evaluator.py → yolo/map_evaluator.py


+ 26 - 0
yolo/models/yolov3/yolov3.py

@@ -54,7 +54,33 @@ class Yolov3(nn.Module):
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                              for head in self.non_shared_heads
                              for head in self.non_shared_heads
                              ])                 
                              ])                 
+
+        # init pred layers
+        self.init_weight()
     
     
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
     def generate_anchors(self, level, fmp_size):
     def generate_anchors(self, level, fmp_size):
         """
         """
             fmp_size: (List) [H, W]
             fmp_size: (List) [H, W]

+ 3 - 3
yolo/models/yolov4/loss.py

@@ -10,9 +10,9 @@ class SetCriterion(object):
         self.cfg = cfg
         self.cfg = cfg
         self.num_classes = cfg.num_classes
         self.num_classes = cfg.num_classes
         # loss weight
         # loss weight
-        self.loss_obj_weight = cfg.loss_obj_weight
-        self.loss_cls_weight = cfg.loss_cls_weight
-        self.loss_box_weight = cfg.loss_box_weight
+        self.loss_obj_weight = cfg.loss_obj
+        self.loss_cls_weight = cfg.loss_cls
+        self.loss_box_weight = cfg.loss_box
 
 
         # matcher
         # matcher
         self.matcher = Yolov4Matcher(self.num_classes, 3, cfg.anchor_size, cfg.iou_thresh)
         self.matcher = Yolov4Matcher(self.num_classes, 3, cfg.anchor_size, cfg.iou_thresh)

+ 26 - 0
yolo/models/yolov4/yolov4.py

@@ -54,7 +54,33 @@ class Yolov4(nn.Module):
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                              for head in self.non_shared_heads
                              for head in self.non_shared_heads
                              ])                 
                              ])                 
+
+        # init pred layers
+        self.init_weight()
     
     
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
     def generate_anchors(self, level, fmp_size):
     def generate_anchors(self, level, fmp_size):
         """
         """
             fmp_size: (List) [H, W]
             fmp_size: (List) [H, W]

+ 0 - 56
yolo/models/yolov7/README.md

@@ -1,56 +0,0 @@
-# YOLOv7:
-
-|    Model    |   Backbone    | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
-|-------------|---------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
-| YOLOv7-Tiny | ELANNet-Tiny  | 8xb16 |  640  |         39.5           |       58.5        |   22.6            |   7.9              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_tiny_coco.pth) |
-| YOLOv7      | ELANNet-Large | 8xb16 |  640  |         49.5           |       68.8        |   144.6           |   44.0             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_coco.pth) |
-| YOLOv7-X    | ELANNet-Huge  |       |  640  |                        |                   |                   |                    |  |
-
-- For training, we train `YOLOv7` and `YOLOv7-Tiny` with 300 epochs on 8 GPUs.
-- For data augmentation, we use the [YOLOX-style](https://github.com/Megvii-BaseDetection/YOLOX) augmentation including the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
-- For optimizer, we use `AdamW` with weight decay 0.05 and per image learning rate 0.001 / 64.
-- For learning rate scheduler, we use Cosine decay scheduler.
-- For YOLOv7's structure, we replace the coupled head with the YOLOX-style decoupled head.
-- I think YOLOv7 uses too many training tricks, such as `anchor box`, `AuxiliaryHead`, `RepConv`, `Mosaic9x` and so on, making the picture of YOLO too complicated, which is against the development concept of the YOLO series. Otherwise, why don't we use the DETR series? It's nothing more than doing some acceleration optimization on DETR. Therefore, I was faithful to my own technical aesthetics and realized a cleaner and simpler YOLOv7, but without the blessing of so many tricks, I did not reproduce all the performance, which is a pity.
-- I have no more GPUs to train my `YOLOv7-X`.
-
-## Train YOLOv7
-### Single GPU
-Taking training YOLOv7-Tiny on COCO as the example,
-```Shell
-python train.py --cuda -d coco --root path/to/coco -m yolov7_tiny -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
-```
-
-### Multi GPU
-Taking training YOLOv7-Tiny on COCO as the example,
-```Shell
-python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov7_tiny -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
-```
-
-## Test YOLOv7
-Taking testing YOLOv7-Tiny on COCO-val as the example,
-```Shell
-python test.py --cuda -d coco --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth -size 640 -vt 0.4 --show 
-```
-
-## Evaluate YOLOv7
-Taking evaluating YOLOv7-Tiny on COCO-val as the example,
-```Shell
-python eval.py --cuda -d coco-val --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth 
-```
-
-## Demo
-### Detect with Image
-```Shell
-python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show
-```
-
-### Detect with Video
-```Shell
-python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
-```
-
-### Detect with Camera
-```Shell
-python demo.py --mode camera --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
-```

+ 2 - 10
yolo/models/yolov7/build.py

@@ -1,5 +1,3 @@
-import torch.nn as nn
-
 from .loss import SetCriterion
 from .loss import SetCriterion
 from .yolov7 import Yolov7
 from .yolov7 import Yolov7
 
 
@@ -8,17 +6,11 @@ from .yolov7 import Yolov7
 def build_yolov7(cfg, is_val=False):
 def build_yolov7(cfg, is_val=False):
     # -------------- Build YOLO --------------
     # -------------- Build YOLO --------------
     model = Yolov7(cfg, is_val)
     model = Yolov7(cfg, is_val)
-
-    # -------------- Initialize YOLO --------------
-    for m in model.modules():
-        if isinstance(m, nn.BatchNorm2d):
-            m.eps = 1e-3
-            m.momentum = 0.03    
-            
+  
     # -------------- Build criterion --------------
     # -------------- Build criterion --------------
     criterion = None
     criterion = None
     if is_val:
     if is_val:
         # build criterion for training
         # build criterion for training
         criterion = SetCriterion(cfg)
         criterion = SetCriterion(cfg)
         
         
-    return model, criterion
+    return model, criterion

+ 12 - 10
yolo/models/yolov7/loss.py

@@ -1,20 +1,24 @@
 import torch
 import torch
 import torch.nn.functional as F
 import torch.nn.functional as F
+from .matcher import SimOTA
 from utils.box_ops import get_ious
 from utils.box_ops import get_ious
 from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
 from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
 
 
-from .matcher import YoloxMatcher
-
 
 
 class SetCriterion(object):
 class SetCriterion(object):
     def __init__(self, cfg):
     def __init__(self, cfg):
         self.cfg = cfg
         self.cfg = cfg
         self.num_classes = cfg.num_classes
         self.num_classes = cfg.num_classes
+        # loss weight
         self.loss_obj_weight = cfg.loss_obj
         self.loss_obj_weight = cfg.loss_obj
         self.loss_cls_weight = cfg.loss_cls
         self.loss_cls_weight = cfg.loss_cls
         self.loss_box_weight = cfg.loss_box
         self.loss_box_weight = cfg.loss_box
         # matcher
         # matcher
-        self.matcher = YoloxMatcher(cfg.num_classes, cfg.ota_center_sampling_radius, cfg.ota_topk_candidate)
+        self.matcher = SimOTA(
+            num_classes=self.num_classes,
+            center_sampling_radius=cfg.ota_center_sampling_radius,
+            topk_candidate=cfg.ota_topk_candidate
+            )
 
 
     def loss_objectness(self, pred_obj, gt_obj):
     def loss_objectness(self, pred_obj, gt_obj):
         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
@@ -37,21 +41,22 @@ class SetCriterion(object):
         """
         """
             outputs['pred_obj']: List(Tensor) [B, M, 1]
             outputs['pred_obj']: List(Tensor) [B, M, 1]
             outputs['pred_cls']: List(Tensor) [B, M, C]
             outputs['pred_cls']: List(Tensor) [B, M, C]
-            outputs['pred_reg']: List(Tensor) [B, M, 4]
+            outputs['pred_box']: List(Tensor) [B, M, 4]
             outputs['pred_box']: List(Tensor) [B, M, 4]
             outputs['pred_box']: List(Tensor) [B, M, 4]
             outputs['strides']: List(Int) [8, 16, 32] output stride
             outputs['strides']: List(Int) [8, 16, 32] output stride
             targets: (List) [dict{'boxes': [...], 
             targets: (List) [dict{'boxes': [...], 
                                  'labels': [...], 
                                  'labels': [...], 
                                  'orig_size': ...}, ...]
                                  'orig_size': ...}, ...]
         """
         """
-        bs = outputs['pred_cls'][0].shape[0]
-        device = outputs['pred_cls'][0].device
         fpn_strides = outputs['strides']
         fpn_strides = outputs['strides']
         anchors = outputs['anchors']
         anchors = outputs['anchors']
+
         # preds: [B, M, C]
         # preds: [B, M, C]
         obj_preds = torch.cat(outputs['pred_obj'], dim=1)
         obj_preds = torch.cat(outputs['pred_obj'], dim=1)
         cls_preds = torch.cat(outputs['pred_cls'], dim=1)
         cls_preds = torch.cat(outputs['pred_cls'], dim=1)
         box_preds = torch.cat(outputs['pred_box'], dim=1)
         box_preds = torch.cat(outputs['pred_box'], dim=1)
+        device = box_preds.device
+        bs = box_preds.shape[0]
 
 
         # label assignment
         # label assignment
         cls_targets = []
         cls_targets = []
@@ -135,7 +140,4 @@ class SetCriterion(object):
         )
         )
 
 
         return loss_dict
         return loss_dict
-
-
-if __name__ == "__main__":
-    pass
+    

+ 4 - 2
yolo/models/yolov7/matcher.py

@@ -8,7 +8,7 @@ import torch.nn.functional as F
 from utils.box_ops import *
 from utils.box_ops import *
 
 
 
 
-class YoloxMatcher(object):
+class SimOTA(object):
     """
     """
         This code referenced to https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
         This code referenced to https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
     """
     """
@@ -81,6 +81,7 @@ class YoloxMatcher(object):
 
 
         return fg_mask, assigned_labels, assigned_ious, assigned_indexs
         return fg_mask, assigned_labels, assigned_ious, assigned_indexs
 
 
+
     def get_in_boxes_info(
     def get_in_boxes_info(
         self,
         self,
         gt_bboxes,   # [N, 4]
         gt_bboxes,   # [N, 4]
@@ -140,7 +141,8 @@ class YoloxMatcher(object):
             is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
             is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
         )
         )
         return is_in_boxes_anchor, is_in_boxes_and_center
         return is_in_boxes_anchor, is_in_boxes_and_center
-
+    
+    
     def dynamic_k_matching(
     def dynamic_k_matching(
         self, 
         self, 
         cost, 
         cost, 

+ 67 - 75
yolo/models/yolov7/modules.py

@@ -1,112 +1,86 @@
+import numpy as np
 import torch
 import torch
 import torch.nn as nn
 import torch.nn as nn
-from typing import List
 
 
 
 
 # --------------------- Basic modules ---------------------
 # --------------------- Basic modules ---------------------
 class ConvModule(nn.Module):
 class ConvModule(nn.Module):
     def __init__(self, 
     def __init__(self, 
-                 in_dim,        # in channels
-                 out_dim,       # out channels 
-                 kernel_size=1, # kernel size 
-                 padding=0,     # padding
-                 stride=1,      # padding
-                 dilation=1,    # dilation
-                ):
+                 in_dim: int,          # in channels
+                 out_dim: int,         # out channels 
+                 kernel_size: int = 1, # kernel size 
+                 stride:int = 1,       # padding
+                 ):
         super(ConvModule, self).__init__()
         super(ConvModule, self).__init__()
-        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False)
-        self.norm = nn.BatchNorm2d(out_dim)
-        self.act  = nn.SiLU(inplace=True)
+        convs = []
+        convs.append(nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size//2, stride=stride, bias=False))
+        convs.append(nn.BatchNorm2d(out_dim))
+        convs.append(nn.SiLU(inplace=True))
+        self.convs = nn.Sequential(*convs)
 
 
     def forward(self, x):
     def forward(self, x):
-        return self.act(self.norm(self.conv(x)))
-
-
-# ---------------------------- Basic Modules ----------------------------
-class MDown(nn.Module):
-    def __init__(self, in_dim: int, out_dim: int, ):
-        super().__init__()
-        inter_dim = out_dim // 2
-        self.downsample_1 = nn.Sequential(
-            nn.MaxPool2d((2, 2), stride=2),
-            ConvModule(in_dim, inter_dim, kernel_size=1)
-        )
-        self.downsample_2 = nn.Sequential(
-            ConvModule(in_dim, inter_dim, kernel_size=1),
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1, stride=2)
-        )
-        if in_dim == out_dim:
-            self.output_proj = nn.Identity()
-        else:
-            self.output_proj = ConvModule(inter_dim * 2, out_dim, kernel_size=1)
+        return self.convs(x)
 
 
-    def forward(self, x):
-        x1 = self.downsample_1(x)
-        x2 = self.downsample_2(x)
-
-        out = self.output_proj(torch.cat([x1, x2], dim=1))
+class ELANBlock(nn.Module):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 expansion: float = 0.5,
+                 branch_depth: int = 2,
+                 ):
+        super(ELANBlock, self).__init__()
+        inter_dim = int(in_dim * expansion)
+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv3 = nn.Sequential(*[ConvModule(inter_dim, inter_dim, kernel_size=3)
+                                   for _ in range(round(branch_depth))
+                                   ])
+        self.cv4 = nn.Sequential(*[ConvModule(inter_dim, inter_dim, kernel_size=3)
+                                   for _ in range(round(branch_depth))
+                                   ])
+        self.out = ConvModule(inter_dim*4, out_dim, kernel_size=1)
 
 
-        return out
 
 
-class ELANLayer(nn.Module):
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 expansion  :float = 0.5,
-                 num_blocks :int   = 1,
-                 ) -> None:
-        super(ELANLayer, self).__init__()
-        self.inter_dim = round(in_dim * expansion)
-        self.conv_layer_1 = ConvModule(in_dim, self.inter_dim, kernel_size=1)
-        self.conv_layer_2 = ConvModule(in_dim, self.inter_dim, kernel_size=1)
-        self.conv_layer_3 = ConvModule(self.inter_dim * 4, out_dim, kernel_size=1)
-        self.elan_layer_1 = nn.Sequential(*[ConvModule(self.inter_dim, self.inter_dim, kernel_size=3, padding=1)
-                                           for _ in range(num_blocks)])
-        self.elan_layer_2 = nn.Sequential(*[ConvModule(self.inter_dim, self.inter_dim, kernel_size=3, padding=1)
-                                           for _ in range(num_blocks)])
 
 
     def forward(self, x):
     def forward(self, x):
-        # Input proj
-        x1 = self.conv_layer_1(x)
-        x2 = self.conv_layer_2(x)
-        x3 = self.elan_layer_1(x2)
-        x4 = self.elan_layer_2(x3)
-    
-        out = self.conv_layer_3(torch.cat([x1, x2, x3, x4], dim=1))
+        x1 = self.cv1(x)
+        x2 = self.cv2(x)
+        x3 = self.cv3(x2)
+        x4 = self.cv4(x3)
+        out = self.out(torch.cat([x1, x2, x3, x4], dim=1))
 
 
         return out
         return out
 
 
-class ELANLayerFPN(nn.Module):
+class ELANBlockFPN(nn.Module):
     def __init__(self,
     def __init__(self,
-                 in_dim,
-                 out_dim,
-                 expansions   :List = [0.5, 0.5],
-                 branch_width :int  = 4,
-                 branch_depth :int  = 1,
+                 in_dim: int,
+                 out_dim: int,
+                 expansion: float = 0.5,
+                 branch_width: int = 4,
+                 branch_depth: int = 1,
                  ):
                  ):
-        super(ELANLayerFPN, self).__init__()
+        super(ELANBlockFPN, self).__init__()
         # Basic parameters
         # Basic parameters
-        inter_dim  = round(in_dim * expansions[0])
-        inter_dim2 = round(inter_dim * expansions[1]) 
+        inter_dim = int(in_dim * expansion)
+        inter_dim2 = int(inter_dim * expansion) 
         # Network structure
         # Network structure
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv3 = nn.ModuleList()
         self.cv3 = nn.ModuleList()
         for idx in range(round(branch_width)):
         for idx in range(round(branch_width)):
             if idx == 0:
             if idx == 0:
-                cvs = [ConvModule(inter_dim, inter_dim2, kernel_size=3, padding=1)]
+                cvs = [ConvModule(inter_dim, inter_dim2, kernel_size=3)]
             else:
             else:
-                cvs = [ConvModule(inter_dim2, inter_dim2, kernel_size=3, padding=1)]
+                cvs = [ConvModule(inter_dim2, inter_dim2, kernel_size=3)]
             # deeper
             # deeper
             if round(branch_depth) > 1:
             if round(branch_depth) > 1:
                 for _ in range(1, round(branch_depth)):
                 for _ in range(1, round(branch_depth)):
-                    cvs.append(ConvModule(inter_dim2, inter_dim2, kernel_size=3, padding=1))
+                    cvs.append(ConvModule(inter_dim2, inter_dim2, kernel_size=3))
                 self.cv3.append(nn.Sequential(*cvs))
                 self.cv3.append(nn.Sequential(*cvs))
             else:
             else:
                 self.cv3.append(cvs[0])
                 self.cv3.append(cvs[0])
 
 
-        self.output_proj = ConvModule(inter_dim*2+inter_dim2*len(self.cv3), out_dim, kernel_size=1)
-
+        self.out = ConvModule(inter_dim*2 + inter_dim2*len(self.cv3), out_dim, kernel_size=1)
 
 
     def forward(self, x):
     def forward(self, x):
         x1 = self.cv1(x)
         x1 = self.cv1(x)
@@ -116,6 +90,24 @@ class ELANLayerFPN(nn.Module):
             y1 = inter_outs[-1]
             y1 = inter_outs[-1]
             y2 = m(y1)
             y2 = m(y1)
             inter_outs.append(y2)
             inter_outs.append(y2)
-        out = self.output_proj(torch.cat(inter_outs, dim=1))
+        out = self.out(torch.cat(inter_outs, dim=1))
+
+        return out
+
+class DownSample(nn.Module):
+    def __init__(self, in_dim, out_dim):
+        super().__init__()
+        inter_dim = out_dim // 2
+        self.mp = nn.MaxPool2d((2, 2), 2)
+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv2 = nn.Sequential(
+            ConvModule(in_dim, inter_dim, kernel_size=1),
+            ConvModule(inter_dim, inter_dim, kernel_size=3, stride=2)
+        )
+
+    def forward(self, x):
+        x1 = self.cv1(self.mp(x))
+        x2 = self.cv2(x)
+        out = torch.cat([x1, x2], dim=1)
 
 
         return out
         return out

+ 119 - 44
yolo/models/yolov7/yolov7.py

@@ -1,49 +1,96 @@
-# --------------- Torch components ---------------
 import torch
 import torch
 import torch.nn as nn
 import torch.nn as nn
 
 
+from utils.misc import multiclass_nms
+
 # --------------- Model components ---------------
 # --------------- Model components ---------------
-from .yolov7_backbone import Yolov7TBackbone, Yolov7LBackbone
+from .yolov7_backbone import Yolov7Backbone
 from .yolov7_neck     import SPPFBlockCSP
 from .yolov7_neck     import SPPFBlockCSP
 from .yolov7_pafpn    import Yolov7PaFPN
 from .yolov7_pafpn    import Yolov7PaFPN
-from .yolov7_head     import Yolov7DetHead
-from .yolov7_pred     import Yolov7DetPredLayer
+from .yolov7_head     import DecoupledHead
 
 
 # --------------- External components ---------------
 # --------------- External components ---------------
 from utils.misc import multiclass_nms
 from utils.misc import multiclass_nms
 
 
 
 
-# Yolov7
 class Yolov7(nn.Module):
 class Yolov7(nn.Module):
-    def __init__(self,
-                 cfg,
-                 is_val = False,
-                 ) -> None:
+    def __init__(self, cfg, is_val: bool = False) -> None:
         super(Yolov7, self).__init__()
         super(Yolov7, self).__init__()
         # ---------------------- Basic setting ----------------------
         # ---------------------- Basic setting ----------------------
-        assert cfg.model_scale in ["t", "l", "x"]
         self.cfg = cfg
         self.cfg = cfg
         self.num_classes = cfg.num_classes
         self.num_classes = cfg.num_classes
+        self.out_stride = cfg.out_stride
+        self.num_levels = len(cfg.out_stride)
+
         ## Post-process parameters
         ## Post-process parameters
         self.topk_candidates  = cfg.val_topk        if is_val else cfg.test_topk
         self.topk_candidates  = cfg.val_topk        if is_val else cfg.test_topk
         self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
         self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
         self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
         self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
         self.no_multi_labels  = False if is_val else True
         self.no_multi_labels  = False if is_val else True
         
         
-        # ---------------------- Network Parameters ----------------------
-        ## Backbone
-        self.backbone = Yolov7TBackbone(cfg) if cfg.model_scale == "t" else Yolov7LBackbone(cfg)
-        self.pyramid_feat_dims = self.backbone.feat_dims[-3:]
-        ## Neck: SPP
-        self.neck = SPPFBlockCSP(self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1]//2)
-        self.pyramid_feat_dims[-1] = self.neck.out_dim
-        ## Neck: FPN
-        self.fpn = Yolov7PaFPN(cfg, self.pyramid_feat_dims)
-        ## Head
-        self.head = Yolov7DetHead(cfg, self.fpn.out_dims)
-        ## Pred
-        self.pred = Yolov7DetPredLayer(cfg)
-
+        # ------------------- Network Structure -------------------
+        self.backbone = Yolov7Backbone(use_pretrained=cfg.use_pretrained)
+        self.neck     = SPPFBlockCSP(self.backbone.feat_dims[-1], self.backbone.feat_dims[-1] // 2, expand_ratio=0.5)
+        self.backbone.feat_dims[-1] = self.backbone.feat_dims[-1] // 2
+        self.fpn      = Yolov7PaFPN(self.backbone.feat_dims[-3:], head_dim=cfg.head_dim)
+        self.non_shared_heads = nn.ModuleList([DecoupledHead(cfg, in_dim)
+                                               for in_dim in self.fpn.fpn_out_dims
+                                               ])
+
+        ## 预测层
+        self.obj_preds = nn.ModuleList(
+                            [nn.Conv2d(head.reg_head_dim, 1, kernel_size=1)
+                             for head in self.non_shared_heads
+                             ]) 
+        self.cls_preds = nn.ModuleList(
+                            [nn.Conv2d(head.cls_head_dim, self.num_classes, kernel_size=1) 
+                             for head in self.non_shared_heads
+                             ]) 
+        self.reg_preds = nn.ModuleList(
+                            [nn.Conv2d(head.reg_head_dim, 4, kernel_size=1) 
+                             for head in self.non_shared_heads
+                             ])
+        
+        # init pred layers
+        self.init_weight()
+    
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
+    def generate_anchors(self, level, fmp_size):
+        """
+            fmp_size: (List) [H, W]
+        """
+        # generate grid cells
+        fmp_h, fmp_w = fmp_size
+        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
+        # [H, W, 2] -> [HW, 2]
+        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
+        anchors += 0.5  # add center offset
+        anchors *= self.out_stride[level]
+
+        return anchors
+        
     def post_process(self, obj_preds, cls_preds, box_preds):
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         """
         We process predictions at each scale hierarchically
         We process predictions at each scale hierarchically
@@ -66,8 +113,7 @@ class Yolov7(nn.Module):
             box_pred_i = box_pred_i[0]
             box_pred_i = box_pred_i[0]
             if self.no_multi_labels:
             if self.no_multi_labels:
                 # [M,]
                 # [M,]
-                scores, labels = torch.max(
-                    torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
 
                 # Keep top k top scoring indices only.
                 # Keep top k top scoring indices only.
                 num_topk = min(self.topk_candidates, box_pred_i.size(0))
                 num_topk = min(self.topk_candidates, box_pred_i.size(0))
@@ -126,33 +172,62 @@ class Yolov7(nn.Module):
         return bboxes, scores, labels
         return bboxes, scores, labels
     
     
     def forward(self, x):
     def forward(self, x):
-        # ---------------- Backbone ----------------
+        bs = x.shape[0]
         pyramid_feats = self.backbone(x)
         pyramid_feats = self.backbone(x)
-
-        # ---------------- Neck: SPP ----------------
         pyramid_feats[-1] = self.neck(pyramid_feats[-1])
         pyramid_feats[-1] = self.neck(pyramid_feats[-1])
-        
-        # ---------------- Neck: PaFPN ----------------
         pyramid_feats = self.fpn(pyramid_feats)
         pyramid_feats = self.fpn(pyramid_feats)
 
 
-        # ---------------- Heads ----------------
-        cls_feats, reg_feats = self.head(pyramid_feats)
-
-        # ---------------- Preds ----------------
-        outputs = self.pred(cls_feats, reg_feats)
-        outputs['image_size'] = [x.shape[2], x.shape[3]]
+        all_anchors = []
+        all_obj_preds = []
+        all_cls_preds = []
+        all_box_preds = []
+        all_reg_preds = []
+        for level, (feat, head) in enumerate(zip(pyramid_feats, self.non_shared_heads)):
+            cls_feat, reg_feat = head(feat)
+
+            # [B, C, H, W]
+            obj_pred = self.obj_preds[level](reg_feat)
+            cls_pred = self.cls_preds[level](cls_feat)
+            reg_pred = self.reg_preds[level](reg_feat)
+
+            B, _, H, W = cls_pred.size()
+            fmp_size = [H, W]
+            # generate anchor boxes: [M, 4]
+            anchors = self.generate_anchors(level, fmp_size)
+            anchors = anchors.to(x.device)
+            
+            # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
+            obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
+            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
+            reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
+
+            # decode bbox
+            ctr_pred = reg_pred[..., :2] * self.out_stride[level] + anchors[..., :2]
+            wh_pred = torch.exp(reg_pred[..., 2:]) * self.out_stride[level]
+            pred_x1y1 = ctr_pred - wh_pred * 0.5
+            pred_x2y2 = ctr_pred + wh_pred * 0.5
+            box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
+
+            all_obj_preds.append(obj_pred)
+            all_cls_preds.append(cls_pred)
+            all_box_preds.append(box_pred)
+            all_reg_preds.append(reg_pred)
+            all_anchors.append(anchors)
 
 
         if not self.training:
         if not self.training:
-            all_obj_preds = outputs['pred_obj']
-            all_cls_preds = outputs['pred_cls']
-            all_box_preds = outputs['pred_box']
-
-            # post process
             bboxes, scores, labels = self.post_process(all_obj_preds, all_cls_preds, all_box_preds)
             bboxes, scores, labels = self.post_process(all_obj_preds, all_cls_preds, all_box_preds)
             outputs = {
             outputs = {
                 "scores": scores,
                 "scores": scores,
                 "labels": labels,
                 "labels": labels,
                 "bboxes": bboxes
                 "bboxes": bboxes
             }
             }
-        
-        return outputs
+        else:
+            outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
+                       "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
+                       "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
+                       "pred_reg": all_reg_preds,        # List(Tensor) [B, M, 4]
+                       "anchors": all_anchors,           # List(Tensor) [M, 2]
+                       "strides": self.out_stride,       # List(Int) [8, 16, 32]
+                       }
+
+        return outputs 

+ 58 - 161
yolo/models/yolov7/yolov7_backbone.py

@@ -2,142 +2,67 @@ import torch
 import torch.nn as nn
 import torch.nn as nn
 
 
 try:
 try:
-    from .modules import ConvModule, MDown, ELANLayer
+    from .modules import ConvModule, ELANBlock, DownSample
 except:
 except:
-    from  modules import ConvModule, MDown, ELANLayer
+    from  modules import ConvModule, ELANBlock, DownSample
+    
 
 
-# IN1K pretrained weight
-pretrained_urls = {
-    't': None,
-    'l': None,
-    'x': None,
+in1k_pretrained_urls = {
+    "elannet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_large.pth",
 }
 }
 
 
-# ELANNet-Tiny
-class Yolov7TBackbone(nn.Module):
-    def __init__(self, cfg):
-        super(Yolov7TBackbone, self).__init__()
-        # ---------------- Basic parameters ----------------
-        self.model_scale = cfg.model_scale
-        self.elan_depth = 1
-        self.feat_dims = [round(64  * cfg.width),
-                          round(128 * cfg.width),
-                          round(256 * cfg.width),
-                          round(512 * cfg.width),
-                          round(1024 * cfg.width)]
-
-        # ---------------- Model parameters ----------------
-        self.layer_1 = self.make_stem(3, self.feat_dims[0])
-        self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], expansion=0.5, downsample="conv")
-        self.layer_3 = self.make_block(self.feat_dims[1], self.feat_dims[2], expansion=0.5, downsample="maxpool")
-        self.layer_4 = self.make_block(self.feat_dims[2], self.feat_dims[3], expansion=0.5, downsample="maxpool")
-        self.layer_5 = self.make_block(self.feat_dims[3], self.feat_dims[4], expansion=0.5, downsample="maxpool")
+# --------------------- Yolov7 backbone (CSPDarkNet-53 with SiLU) -----------------------
+class Yolov7Backbone(nn.Module):
+    def __init__(self, use_pretrained: bool = False):
+        super(Yolov7Backbone, self).__init__()
+        self.feat_dims = [32, 64, 128, 256, 512, 1024, 1024]
+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25]  # Stage-1 -> Stage-4
+        self.branch_depths = [2, 2, 2, 2]            # Stage-1 -> Stage-4
+        self.use_pretrained = use_pretrained
+
+        # -------------------- Network parameters --------------------
+        ## P1/2
+        self.layer_1 = nn.Sequential(
+            ConvModule(3, self.feat_dims[0], kernel_size=3),      
+            ConvModule(self.feat_dims[0], self.feat_dims[1], kernel_size=3, stride=2),
+            ConvModule(self.feat_dims[1], self.feat_dims[1], kernel_size=3)
+        )
+        ## P2/4: Stage-1
+        self.layer_2 = nn.Sequential(   
+            ConvModule(self.feat_dims[1], self.feat_dims[2], kernel_size=3, stride=2),             
+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0])
+        )
+        ## P3/8: Stage-2
+        self.layer_3 = nn.Sequential(
+            DownSample(self.feat_dims[3], self.feat_dims[3]),
+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1])
+        )
+        ## P4/16: Stage-3
+        self.layer_4 = nn.Sequential(
+            DownSample(self.feat_dims[4], self.feat_dims[4]),
+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2])
+        )
+        ## P5/32: Stage-4
+        self.layer_5 = nn.Sequential(
+            DownSample(self.feat_dims[5], self.feat_dims[5]),
+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3])
+        )
 
 
         # Initialize all layers
         # Initialize all layers
         self.init_weights()
         self.init_weights()
         
         
-        # Load imagenet pretrained weight
-        if cfg.use_pretrained:
-            self.load_pretrained()
-        
     def init_weights(self):
     def init_weights(self):
         """Initialize the parameters."""
         """Initialize the parameters."""
         for m in self.modules():
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
             if isinstance(m, torch.nn.Conv2d):
                 m.reset_parameters()
                 m.reset_parameters()
 
 
-    def load_pretrained(self):
-        url = pretrained_urls[self.model_scale]
-        if url is not None:
-            print('Loading backbone pretrained weight from : {}'.format(url))
-            # checkpoint state dict
-            checkpoint = torch.hub.load_state_dict_from_url(
-                url=url, map_location="cpu", check_hash=True)
-            checkpoint_state_dict = checkpoint.pop("model")
-            # model state dict
-            model_state_dict = self.state_dict()
-            # check
-            for k in list(checkpoint_state_dict.keys()):
-                if k in model_state_dict:
-                    shape_model = tuple(model_state_dict[k].shape)
-                    shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
-                    if shape_model != shape_checkpoint:
-                        checkpoint_state_dict.pop(k)
-                else:
-                    checkpoint_state_dict.pop(k)
-                    print('Unused key: ', k)
-            # load the weight
-            self.load_state_dict(checkpoint_state_dict)
-        else:
-            print('No pretrained weight for model scale: {}.'.format(self.model_scale))
-
-    def make_stem(self, in_dim, out_dim):
-        stem = ConvModule(in_dim, out_dim, kernel_size=6, padding=2, stride=2)
-        
-        return stem
-
-    def make_block(self, in_dim, out_dim, expansion=0.5, downsample="maxpool"):
-        if downsample == "maxpool":
-            block = nn.Sequential(
-                nn.MaxPool2d((2, 2), stride=2),             
-                ELANLayer(in_dim, out_dim, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        elif downsample == "conv":
-            block = nn.Sequential(
-                ConvModule(in_dim, out_dim, kernel_size=3, padding=1, stride=2),             
-                ELANLayer(out_dim, out_dim, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        else:
-            raise NotImplementedError("Unknown downsample type: {}".format(downsample))
-
-        return block
-    
-    def forward(self, x):
-        c1 = self.layer_1(x)
-        c2 = self.layer_2(c1)
-        c3 = self.layer_3(c2)
-        c4 = self.layer_4(c3)
-        c5 = self.layer_5(c4)
-        outputs = [c3, c4, c5]
-
-        return outputs
-
-# ELANNet-Large
-class Yolov7LBackbone(nn.Module):
-    def __init__(self, cfg):
-        super(Yolov7LBackbone, self).__init__()
-        # ---------------- Basic parameters ----------------
-        self.model_scale = cfg.model_scale
-        self.elan_depth = 2
-        self.feat_dims = [round(64  * cfg.width),
-                          round(128  * cfg.width),
-                          round(256  * cfg.width),
-                          round(512  * cfg.width),
-                          round(1024 * cfg.width),
-                          round(1024 * cfg.width)]
-
-        # ---------------- Model parameters ----------------
-        self.layer_1 = self.make_stem(3, self.feat_dims[0])
-        self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], self.feat_dims[2], expansion=0.5, conv_downsample=True)
-        self.layer_3 = self.make_block(self.feat_dims[2], self.feat_dims[2], self.feat_dims[3], expansion=0.5)
-        self.layer_4 = self.make_block(self.feat_dims[3], self.feat_dims[3], self.feat_dims[4], expansion=0.5)
-        self.layer_5 = self.make_block(self.feat_dims[4], self.feat_dims[4], self.feat_dims[5], expansion=0.25)
-
-        # Initialize all layers
-        self.init_weights()
-        
         # Load imagenet pretrained weight
         # Load imagenet pretrained weight
-        if cfg.use_pretrained:
+        if self.use_pretrained:
             self.load_pretrained()
             self.load_pretrained()
-        
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
 
 
     def load_pretrained(self):
     def load_pretrained(self):
-        url = pretrained_urls[self.model_scale]
+        url = in1k_pretrained_urls["elannet_large"]
         if url is not None:
         if url is not None:
             print('Loading backbone pretrained weight from : {}'.format(url))
             print('Loading backbone pretrained weight from : {}'.format(url))
             # checkpoint state dict
             # checkpoint state dict
@@ -161,63 +86,35 @@ class Yolov7LBackbone(nn.Module):
         else:
         else:
             print('No pretrained weight for model scale: {}.'.format(self.model_scale))
             print('No pretrained weight for model scale: {}.'.format(self.model_scale))
 
 
-    def make_stem(self, in_dim, out_dim):
-        stem = nn.Sequential(
-            ConvModule(in_dim, out_dim//2, kernel_size=3, padding=1, stride=1),
-            ConvModule(out_dim//2, out_dim, kernel_size=3, padding=1, stride=2),
-            ConvModule(out_dim, out_dim, kernel_size=3, padding=1, stride=1)
-
-        )
-
-        return stem
-
-    def make_block(self, in_dim, out_dim_1, out_dim_2, expansion=0.5, conv_downsample=False):
-        if conv_downsample:
-            block = nn.Sequential(
-                ConvModule(in_dim, out_dim_1, kernel_size=3, padding=1, stride=2),             
-                ELANLayer(out_dim_1, out_dim_2, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        else:
-            block = nn.Sequential(
-                MDown(in_dim, out_dim_1),             
-                ELANLayer(out_dim_1, out_dim_2, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        
-        return block
-    
     def forward(self, x):
     def forward(self, x):
         c1 = self.layer_1(x)
         c1 = self.layer_1(x)
         c2 = self.layer_2(c1)
         c2 = self.layer_2(c1)
         c3 = self.layer_3(c2)
         c3 = self.layer_3(c2)
         c4 = self.layer_4(c3)
         c4 = self.layer_4(c3)
         c5 = self.layer_5(c4)
         c5 = self.layer_5(c4)
+
         outputs = [c3, c4, c5]
         outputs = [c3, c4, c5]
 
 
         return outputs
         return outputs
 
 
 
 
-if __name__ == '__main__':
-    import time
+if __name__=='__main__':
     from thop import profile
     from thop import profile
-    class BaseConfig(object):
-        def __init__(self) -> None:
-            self.use_pretrained = False
-            self.width = 0.5
-            self.model_scale = "t"
-
-    cfg = BaseConfig()
-    model = Yolov7TBackbone(cfg)
-    x = torch.randn(1, 3, 640, 640)
-    t0 = time.time()
+
+    # Build backbone
+    model = Yolov7Backbone(use_pretrained=True)
+
+    # Randomly generate a input data
+    x = torch.randn(2, 3, 640, 640)
+
+    # Inference
     outputs = model(x)
     outputs = model(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
+    print(' - the shape of input :  ', x.shape)
     for out in outputs:
     for out in outputs:
-        print(out.shape)
+        print(' - the shape of output : ', out.shape)
 
 
     x = torch.randn(1, 3, 640, 640)
     x = torch.randn(1, 3, 640, 640)
-    print('==============================')
     flops, params = profile(model, inputs=(x, ), verbose=False)
     flops, params = profile(model, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 34 - 94
yolo/models/yolov7/yolov7_head.py

@@ -1,5 +1,6 @@
 import torch
 import torch
 import torch.nn as nn
 import torch.nn as nn
+from typing import List
 
 
 try:
 try:
     from .modules import ConvModule
     from .modules import ConvModule
@@ -7,50 +8,34 @@ except:
     from  modules import ConvModule
     from  modules import ConvModule
 
 
 
 
-## Single-level Detection Head
-class DetHead(nn.Module):
-    def __init__(self,
-                 in_dim       :int  = 256,
-                 cls_head_dim :int  = 256,
-                 reg_head_dim :int  = 256,
-                 num_cls_head :int  = 2,
-                 num_reg_head :int  = 2,
-                 ):
+class DecoupledHead(nn.Module):
+    def __init__(self, cfg, in_dim: int = 256):
         super().__init__()
         super().__init__()
-        # --------- Basic Parameters ----------
         self.in_dim = in_dim
         self.in_dim = in_dim
-        self.num_cls_head = num_cls_head
-        self.num_reg_head = num_reg_head
-        
-        # --------- Network Parameters ----------
-        ## cls head
+        self.cls_head_dim = cfg.head_dim
+        self.reg_head_dim = cfg.head_dim
+        self.num_cls_head = cfg.num_cls_head
+        self.num_reg_head = cfg.num_reg_head
+
+        # classification feature head
         cls_feats = []
         cls_feats = []
-        self.cls_head_dim = cls_head_dim
-        for i in range(num_cls_head):
+        for i in range(self.num_cls_head):
             if i == 0:
             if i == 0:
-                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
+                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, stride=1))
             else:
             else:
-                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
-        ## reg head
+                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, stride=1))
+                
+        # box regression feature head
         reg_feats = []
         reg_feats = []
-        self.reg_head_dim = reg_head_dim
-        for i in range(num_reg_head):
+        for i in range(self.num_reg_head):
             if i == 0:
             if i == 0:
-                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
+                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, stride=1))
             else:
             else:
-                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
+                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, stride=1))
 
 
         self.cls_feats = nn.Sequential(*cls_feats)
         self.cls_feats = nn.Sequential(*cls_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
 
 
-        self.init_weights()
-        
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
     def forward(self, x):
         """
         """
             in_feats: (Tensor) [B, C, H, W]
             in_feats: (Tensor) [B, C, H, W]
@@ -59,79 +44,34 @@ class DetHead(nn.Module):
         reg_feats = self.reg_feats(x)
         reg_feats = self.reg_feats(x)
 
 
         return cls_feats, reg_feats
         return cls_feats, reg_feats
-    
-## Multi-level Detection Head
-class Yolov7DetHead(nn.Module):
-    def __init__(self, cfg, in_dims):
-        super().__init__()
-        self.num_levels = len(cfg.out_stride)
-        ## ----------- Network Parameters -----------
-        self.multi_level_heads = nn.ModuleList(
-            [DetHead(in_dim       = in_dims[level],
-                     cls_head_dim = round(cfg.head_dim * cfg.width),
-                     reg_head_dim = round(cfg.head_dim * cfg.width),
-                     num_cls_head = cfg.num_cls_head,
-                     num_reg_head = cfg.num_reg_head,
-                     ) for level in range(self.num_levels)])
-        
-        # --------- Basic Parameters ----------
-        self.in_dims = in_dims
-        self.cls_head_dim = cfg.head_dim
-        self.reg_head_dim = cfg.head_dim
-
-    def forward(self, feats):
-        """
-            feats: List[(Tensor)] [[B, C, H, W], ...]
-        """
-        cls_feats = []
-        reg_feats = []
-        for feat, head in zip(feats, self.multi_level_heads):
-            # ---------------- Pred ----------------
-            cls_feat, reg_feat = head(feat)
-
-            cls_feats.append(cls_feat)
-            reg_feats.append(reg_feat)
-
-        return cls_feats, reg_feats
 
 
 
 
 if __name__=='__main__':
 if __name__=='__main__':
-    import time
     from thop import profile
     from thop import profile
-    # Model config
     
     
-    # YOLOv7-Base config
+    # YOLOv2 configuration
     class Yolov7BaseConfig(object):
     class Yolov7BaseConfig(object):
         def __init__(self) -> None:
         def __init__(self) -> None:
             # ---------------- Model config ----------------
             # ---------------- Model config ----------------
-            self.width    = 0.50
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            self.num_levels = 3
-            ## Head
             self.head_dim  = 256
             self.head_dim  = 256
-            self.num_cls_head   = 2
-            self.num_reg_head   = 2
-
+            self.num_cls_head = 2
+            self.num_reg_head = 2
     cfg = Yolov7BaseConfig()
     cfg = Yolov7BaseConfig()
+
     # Build a head
     # Build a head
-    pyramid_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                     torch.randn(1, cfg.head_dim, 40, 40),
-                     torch.randn(1, cfg.head_dim, 20, 20)]
-    head = Yolov7DetHead(cfg, [cfg.head_dim]*3)
+    model = DecoupledHead(cfg, in_dim= 256)
 
 
+    # Randomly generate a input data
+    x = torch.randn(2, 256, 20, 20)
 
 
     # Inference
     # Inference
-    t0 = time.time()
-    cls_feats, reg_feats = head(pyramid_feats)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print("====== Yolov7 Head output ======")
-    for level, (cls_f, reg_f) in enumerate(zip(cls_feats, reg_feats)):
-        print("- Level-{} : ".format(level), cls_f.shape, reg_f.shape)
-
-    flops, params = profile(head, inputs=(pyramid_feats, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
-      
+    cls_feats, reg_feats = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of cls feats : ', cls_feats.shape)
+    print(' - the shape of reg feats : ', reg_feats.shape)
+
+    x = torch.randn(1, 256, 20, 20)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 31 - 62
yolo/models/yolov7/yolov7_neck.py

@@ -7,30 +7,18 @@ except:
     from  modules import ConvModule
     from  modules import ConvModule
 
 
 
 
-# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv7-AF by Glenn Jocher
 class SPPF(nn.Module):
 class SPPF(nn.Module):
     """
     """
-        This code referenced to https://github.com/ultralytics/yolov7-AF
+        This code referenced to https://github.com/ultralytics/yolov5
     """
     """
-    def __init__(self, in_dim, out_dim, expansion=0.5):
+    def __init__(self, in_dim, out_dim, expand_ratio=0.5):
         super().__init__()
         super().__init__()
-        ## ----------- Basic Parameters -----------
-        inter_dim = int(in_dim * expansion)
+        inter_dim = int(in_dim * expand_ratio)
         self.out_dim = out_dim
         self.out_dim = out_dim
-        ## ----------- Network Parameters -----------
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
-        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1,)
+        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1)
         self.m = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
         self.m = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
 
 
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
     def forward(self, x):
         x = self.cv1(x)
         x = self.cv1(x)
         y1 = self.m(x)
         y1 = self.m(x)
@@ -38,71 +26,52 @@ class SPPF(nn.Module):
 
 
         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
 
 
-# SPPF block with CSP module
 class SPPFBlockCSP(nn.Module):
 class SPPFBlockCSP(nn.Module):
-    """
-        CSP Spatial Pyramid Pooling Block
-    """
-    def __init__(self, in_dim, out_dim):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 expand_ratio: float = 0.5,
+                 ):
         super(SPPFBlockCSP, self).__init__()
         super(SPPFBlockCSP, self).__init__()
-        inter_dim = in_dim // 2
+        inter_dim = int(in_dim * expand_ratio)
         self.out_dim = out_dim
         self.out_dim = out_dim
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
-        self.module = nn.Sequential(
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1),
-            SPPF(inter_dim, inter_dim, expansion=1.0),
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1),
-            )
+        self.m = nn.Sequential(
+            ConvModule(inter_dim, inter_dim, kernel_size=3),
+            SPPF(inter_dim, inter_dim, expand_ratio=1.0),
+            ConvModule(inter_dim, inter_dim, kernel_size=3)
+        )
         self.cv3 = ConvModule(inter_dim * 2, self.out_dim, kernel_size=1)
         self.cv3 = ConvModule(inter_dim * 2, self.out_dim, kernel_size=1)
 
 
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
     def forward(self, x):
         x1 = self.cv1(x)
         x1 = self.cv1(x)
-        x2 = self.module(self.cv2(x))
-        y = self.cv3(torch.cat([x1, x2], dim=1))
+        x2 = self.cv2(x)
+        x3 = self.m(x2)
+        y = self.cv3(torch.cat([x1, x3], dim=1))
 
 
         return y
         return y
 
 
 
 
 if __name__=='__main__':
 if __name__=='__main__':
-    import time
     from thop import profile
     from thop import profile
-    # Model config
     
     
-    # YOLOv7-AF-Base config
-    class Yolov7AFBaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.out_stride = 32
-            self.max_stride = 32
-            ## Neck
-            self.neck_expand_ratio = 0.5
-            self.spp_pooling_size  = 5
-
-    cfg = Yolov7AFBaseConfig()
-    # Build a head
+    # Build a neck
     in_dim  = 512
     in_dim  = 512
     out_dim = 512
     out_dim = 512
-    neck = SPPF(in_dim, out_dim)
+    model = SPPFBlockCSP(512, 512, expand_ratio=0.5)
+
+    # Randomly generate a input data
+    x = torch.randn(2, in_dim, 20, 20)
 
 
     # Inference
     # Inference
+    output = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
+
     x = torch.randn(1, in_dim, 20, 20)
     x = torch.randn(1, in_dim, 20, 20)
-    t0 = time.time()
-    output = neck(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('Neck output: ', output.shape)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))
 
 
-    flops, params = profile(neck, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))

+ 79 - 97
yolo/models/yolov7/yolov7_pafpn.py

@@ -1,115 +1,111 @@
-from typing import List
 import torch
 import torch
 import torch.nn as nn
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.nn.functional as F
 
 
 try:
 try:
-    from .modules import ConvModule, ELANLayerFPN, MDown
+    from .modules import ConvModule, ELANBlockFPN, DownSample
 except:
 except:
-    from  modules import ConvModule, ELANLayerFPN, MDown
+    from  modules import ConvModule, ELANBlockFPN, DownSample
 
 
 
 
-# Yolov7 af PaFPN
+# PaFPN-ELAN (YOLOv7's)
 class Yolov7PaFPN(nn.Module):
 class Yolov7PaFPN(nn.Module):
-    def __init__(self, cfg, in_dims: List = [512, 1024, 512]):
+    def __init__(self, 
+                 in_dims = [512, 1024, 512],
+                 head_dim = 256,
+                 ):
         super(Yolov7PaFPN, self).__init__()
         super(Yolov7PaFPN, self).__init__()
         # ----------------------------- Basic parameters -----------------------------
         # ----------------------------- Basic parameters -----------------------------
         self.in_dims = in_dims
         self.in_dims = in_dims
-        c3, c4, c5 = in_dims
+        self.head_dim = head_dim
+        self.fpn_out_dims = [head_dim] * 3
 
 
-        # ----------------------------- Yolov7's Top-down FPN -----------------------------
+        self.branch_width = 4
+        self.branch_depth = 1
+
+        c3, c4, c5 = self.in_dims
+
+        # ----------------------------- Top-down FPN -----------------------------
         ## P5 -> P4
         ## P5 -> P4
-        self.reduce_layer_1 = ConvModule(c5, round(256*cfg.width), kernel_size=1)
-        self.reduce_layer_2 = ConvModule(c4, round(256*cfg.width), kernel_size=1)
-        self.top_down_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
-                                             out_dim      = round(256*cfg.width),
-                                             expansions   = cfg.fpn_expansions,
-                                             branch_width = cfg.fpn_block_bw,
-                                             branch_depth = cfg.fpn_block_dw,
+        self.reduce_layer_1 = ConvModule(c5, 256, kernel_size=1)
+        self.reduce_layer_2 = ConvModule(c4, 256, kernel_size=1)
+        self.top_down_layer_1 = ELANBlockFPN(in_dim = 256 + 256,
+                                             out_dim = 256,
+                                             expansion = 0.5,
+                                             branch_width = self.branch_width,
+                                             branch_depth = self.branch_depth,
                                              )
                                              )
         ## P4 -> P3
         ## P4 -> P3
-        self.reduce_layer_3 = ConvModule(round(256*cfg.width), round(128*cfg.width), kernel_size=1)
-        self.reduce_layer_4 = ConvModule(c3, round(128*cfg.width), kernel_size=1)
-        self.top_down_layer_2 = ELANLayerFPN(in_dim       = round(128*cfg.width) + round(128*cfg.width),
-                                             out_dim      = round(128*cfg.width),
-                                             expansions   = cfg.fpn_expansions,
-                                             branch_width = cfg.fpn_block_bw,
-                                             branch_depth = cfg.fpn_block_dw,
+        self.reduce_layer_3 = ConvModule(256, 128, kernel_size=1)
+        self.reduce_layer_4 = ConvModule(c3, 128, kernel_size=1)
+        self.top_down_layer_2 = ELANBlockFPN(in_dim = 128 + 128,
+                                             out_dim = 128,
+                                             expansion = 0.5,
+                                             branch_width = self.branch_width,
+                                             branch_depth = self.branch_depth,
                                              )
                                              )
-        # ----------------------------- Yolov7's Bottom-up PAN -----------------------------
+        # ----------------------------- Bottom-up FPN -----------------------------
         ## P3 -> P4
         ## P3 -> P4
-        self.downsample_layer_1 = MDown(round(128*cfg.width), round(256*cfg.width))
-        self.bottom_up_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
-                                              out_dim      = round(256*cfg.width),
-                                              expansions   = cfg.fpn_expansions,
-                                              branch_width = cfg.fpn_block_bw,
-                                              branch_depth = cfg.fpn_block_dw,
+        self.downsample_layer_1 = DownSample(128, 256)
+        self.bottom_up_layer_1 = ELANBlockFPN(in_dim = 256 + 256,
+                                              out_dim = 256,
+                                              expansion = 0.5,
+                                              branch_width = self.branch_width,
+                                              branch_depth = self.branch_depth,
                                               )
                                               )
         ## P4 -> P5
         ## P4 -> P5
-        self.downsample_layer_2 = MDown(round(256*cfg.width), round(512*cfg.width))
-        self.bottom_up_layer_2 = ELANLayerFPN(in_dim       = round(512*cfg.width) + c5,
-                                              out_dim      = round(512*cfg.width),
-                                              expansions   = cfg.fpn_expansions,
-                                              branch_width = cfg.fpn_block_bw,
-                                              branch_depth = cfg.fpn_block_dw,
+        self.downsample_layer_2 = DownSample(256, 512)
+        self.bottom_up_layer_2 = ELANBlockFPN(in_dim = 512 + c5,
+                                              out_dim = 512,
+                                              expansion = 0.5,
+                                              branch_width = self.branch_width,
+                                              branch_depth = self.branch_depth,
                                               )
                                               )
 
 
-        # ----------------------------- Head conv layers -----------------------------
         ## Head convs
         ## Head convs
-        self.head_conv_1 = ConvModule(round(128*cfg.width), round(256*cfg.width), kernel_size=3, padding=1, stride=1)
-        self.head_conv_2 = ConvModule(round(256*cfg.width), round(512*cfg.width), kernel_size=3, padding=1, stride=1)
-        self.head_conv_3 = ConvModule(round(512*cfg.width), round(1024*cfg.width), kernel_size=3, padding=1, stride=1)
-
-        # ---------------------- Yolox's output projection ----------------------
-        self.out_layers = nn.ModuleList([
-            ConvModule(in_dim, round(cfg.head_dim*cfg.width), kernel_size=1)
-                      for in_dim in [round(256*cfg.width), round(512*cfg.width), round(1024*cfg.width)]
-                      ])
-        self.out_dims = [round(cfg.head_dim*cfg.width)] * 3
-
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
+        self.head_conv_1 = ConvModule(128, 256, kernel_size=3, stride=1)
+        self.head_conv_2 = ConvModule(256, 512, kernel_size=3, stride=1)
+        self.head_conv_3 = ConvModule(512, 1024, kernel_size=3, stride=1)
+
+        ## Output projs
+        self.out_layers = nn.ModuleList([ConvModule(in_dim, head_dim, kernel_size=1)
+                                         for in_dim in [256, 512, 1024]
+                                         ])
 
 
     def forward(self, features):
     def forward(self, features):
         c3, c4, c5 = features
         c3, c4, c5 = features
 
 
-        # ------------------ Top down FPN ------------------
+        # Top down
         ## P5 -> P4
         ## P5 -> P4
-        p5 = self.reduce_layer_1(c5)
-        p5_up = F.interpolate(p5, scale_factor=2.0)
-        p4 = self.reduce_layer_2(c4)
-        p4 = self.top_down_layer_1(torch.cat([p5_up, p4], dim=1))
-
+        c6 = self.reduce_layer_1(c5)
+        c7 = F.interpolate(c6, scale_factor=2.0)
+        c8 = torch.cat([c7, self.reduce_layer_2(c4)], dim=1)
+        c9 = self.top_down_layer_1(c8)
         ## P4 -> P3
         ## P4 -> P3
-        p4_in = self.reduce_layer_3(p4)
-        p4_up = F.interpolate(p4_in, scale_factor=2.0)
-        p3 = self.reduce_layer_4(c3)
-        p3 = self.top_down_layer_2(torch.cat([p4_up, p3], dim=1))
-
-        # ------------------ Bottom up PAN ------------------
-        ## P3 -> P4
-        p3_ds = self.downsample_layer_1(p3)
-        p4 = torch.cat([p3_ds, p4], dim=1)
-        p4 = self.bottom_up_layer_1(p4)
-
+        c10 = self.reduce_layer_3(c9)
+        c11 = F.interpolate(c10, scale_factor=2.0)
+        c12 = torch.cat([c11, self.reduce_layer_4(c3)], dim=1)
+        c13 = self.top_down_layer_2(c12)
+
+        # Bottom up
+        ## p3 -> P4
+        c14 = self.downsample_layer_1(c13)
+        c15 = torch.cat([c14, c9], dim=1)
+        c16 = self.bottom_up_layer_1(c15)
         ## P4 -> P5
         ## P4 -> P5
-        p4_ds = self.downsample_layer_2(p4)
-        p5 = torch.cat([p4_ds, c5], dim=1)
-        p5 = self.bottom_up_layer_2(p5)
-
-        out_feats = [self.head_conv_1(p3), self.head_conv_2(p4), self.head_conv_3(p5)]
-            
+        c17 = self.downsample_layer_2(c16)
+        c18 = torch.cat([c17, c5], dim=1)
+        c19 = self.bottom_up_layer_2(c18)
+
+        c20 = self.head_conv_1(c13)
+        c21 = self.head_conv_2(c16)
+        c22 = self.head_conv_3(c19)
+        out_feats = [c20, c21, c22] # [P3, P4, P5]
+        
         # output proj layers
         # output proj layers
         out_feats_proj = []
         out_feats_proj = []
         for feat, layer in zip(out_feats, self.out_layers):
         for feat, layer in zip(out_feats, self.out_layers):
             out_feats_proj.append(layer(feat))
             out_feats_proj.append(layer(feat))
-            
         return out_feats_proj
         return out_feats_proj
 
 
 
 
@@ -118,30 +114,16 @@ if __name__=='__main__':
     from thop import profile
     from thop import profile
     # Model config
     # Model config
     
     
-    # YOLOv7-Base config
-    class Yolov7BaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.width    = 0.50
-            self.depth    = 0.34
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            self.num_levels = 3
-            self.fpn_expansions = [0.5, 0.5]
-            self.fpn_block_bw = 4
-            self.fpn_block_dw = 1
-            ## Head
-            self.head_dim = 256
-
-    cfg = Yolov7BaseConfig()
     # Build a head
     # Build a head
     in_dims  = [128, 256, 512]
     in_dims  = [128, 256, 512]
-    fpn = Yolov7PaFPN(cfg, in_dims)
+    fpn = Yolov7PaFPN(in_dims, head_dim=256)
 
 
-    # Inference
+    # Randomly generate a input data
     x = [torch.randn(1, in_dims[0], 80, 80),
     x = [torch.randn(1, in_dims[0], 80, 80),
          torch.randn(1, in_dims[1], 40, 40),
          torch.randn(1, in_dims[1], 40, 40),
          torch.randn(1, in_dims[2], 20, 20)]
          torch.randn(1, in_dims[2], 20, 20)]
+    
+    # Inference
     t0 = time.time()
     t0 = time.time()
     output = fpn(x)
     output = fpn(x)
     t1 = time.time()
     t1 = time.time()
@@ -153,4 +135,4 @@ if __name__=='__main__':
     flops, params = profile(fpn, inputs=(x, ), verbose=False)
     flops, params = profile(fpn, inputs=(x, ), verbose=False)
     print('==============================')
     print('==============================')
     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    print('Params : {:.2f} M'.format(params / 1e6))

+ 0 - 196
yolo/models/yolov7/yolov7_pred.py

@@ -1,196 +0,0 @@
-import torch
-import torch.nn as nn
-
-
-# -------------------- Detection Pred Layer --------------------
-class DetPredLayer(nn.Module):
-    def __init__(self,
-                 cls_dim      :int,
-                 reg_dim      :int,
-                 stride       :int,
-                 num_classes  :int,
-                 ):
-        super().__init__()
-        # --------- Basic Parameters ----------
-        self.stride  = stride
-        self.cls_dim = cls_dim
-        self.reg_dim = reg_dim
-        self.num_classes = num_classes
-
-        # --------- Network Parameters ----------
-        self.obj_pred = nn.Conv2d(self.cls_dim, 1, kernel_size=1)
-        self.cls_pred = nn.Conv2d(self.cls_dim, num_classes, kernel_size=1)
-        self.reg_pred = nn.Conv2d(self.reg_dim, 4, kernel_size=1)                
-
-        self.init_bias()
-        
-    def init_bias(self):
-        # Init bias
-        init_prob = 0.01
-        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
-        # obj pred
-        b = self.obj_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        self.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        # cls pred
-        b = self.cls_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        self.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        # reg pred
-        b = self.reg_pred.bias.view(-1, )
-        b.data.fill_(1.0)
-        self.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        w = self.reg_pred.weight
-        w.data.fill_(0.)
-        self.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
-
-    def generate_anchors(self, fmp_size):
-        """
-            fmp_size: (List) [H, W]
-        """
-        fmp_h, fmp_w = fmp_size
-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
-
-        # [H, W, 2] -> [HW, 2]
-        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
-        anchors = anchors + 0.5
-        anchors = anchors * self.stride
-
-        return anchors
-        
-    def forward(self, cls_feat, reg_feat):
-        # 预测层
-        obj_pred = self.obj_pred(reg_feat)
-        cls_pred = self.cls_pred(cls_feat)
-        reg_pred = self.reg_pred(reg_feat)
-
-        # 生成网格坐标
-        B, _, H, W = cls_pred.size()
-        fmp_size = [H, W]
-        anchors = self.generate_anchors(fmp_size)
-        anchors = anchors.to(cls_pred.device)
-
-        # 对 pred 的size做一些view调整,便于后续的处理
-        # [B, C, H, W] -> [B, H, W, C] -> [B, H*W, C]
-        obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
-        cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
-        reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
-        
-        # 解算边界框坐标
-        cxcy_pred = reg_pred[..., :2] * self.stride + anchors
-        bwbh_pred = torch.exp(reg_pred[..., 2:]) * self.stride
-        pred_x1y1 = cxcy_pred - bwbh_pred * 0.5
-        pred_x2y2 = cxcy_pred + bwbh_pred * 0.5
-        box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
-
-        # output dict
-        outputs = {"pred_obj": obj_pred,       # (torch.Tensor) [B, M, 1]
-                   "pred_cls": cls_pred,       # (torch.Tensor) [B, M, C]
-                   "pred_reg": reg_pred,       # (torch.Tensor) [B, M, 4]
-                   "pred_box": box_pred,       # (torch.Tensor) [B, M, 4]
-                   "anchors" : anchors,        # (torch.Tensor) [M, 2]
-                   "fmp_size": fmp_size,
-                   "stride"  : self.stride,    # (Int)
-                   }
-
-        return outputs
-
-class Yolov7DetPredLayer(nn.Module):
-    def __init__(self, cfg):
-        super().__init__()
-        # --------- Basic Parameters ----------
-        self.cfg = cfg
-        self.num_levels = len(cfg.out_stride)
-
-        # ----------- Network Parameters -----------
-        ## pred layers
-        self.multi_level_preds = nn.ModuleList(
-            [DetPredLayer(cls_dim      = round(cfg.head_dim * cfg.width),
-                          reg_dim      = round(cfg.head_dim * cfg.width),
-                          stride       = cfg.out_stride[level],
-                          num_classes  = cfg.num_classes,)
-                          for level in range(self.num_levels)
-                          ])
-
-    def forward(self, cls_feats, reg_feats):
-        all_anchors = []
-        all_fmp_sizes = []
-        all_obj_preds = []
-        all_cls_preds = []
-        all_reg_preds = []
-        all_box_preds = []
-        for level in range(self.num_levels):
-            # -------------- Single-level prediction --------------
-            outputs = self.multi_level_preds[level](cls_feats[level], reg_feats[level])
-
-            # collect results
-            all_obj_preds.append(outputs["pred_obj"])
-            all_cls_preds.append(outputs["pred_cls"])
-            all_reg_preds.append(outputs["pred_reg"])
-            all_box_preds.append(outputs["pred_box"])
-            all_fmp_sizes.append(outputs["fmp_size"])
-            all_anchors.append(outputs["anchors"])
-        
-        # output dict
-        outputs = {"pred_obj":  all_obj_preds,         # List(Tensor) [B, M, 1]
-                   "pred_cls":  all_cls_preds,         # List(Tensor) [B, M, C]
-                   "pred_reg":  all_reg_preds,         # List(Tensor) [B, M, 4*(reg_max)]
-                   "pred_box":  all_box_preds,         # List(Tensor) [B, M, 4]
-                   "fmp_sizes": all_fmp_sizes,         # List(Tensor) [M, 1]
-                   "anchors":   all_anchors,           # List(Tensor) [M, 2]
-                   "strides":   self.cfg.out_stride,   # List(Int) = [8, 16, 32]
-                   }
-
-        return outputs
-
-
-if __name__=='__main__':
-    import time
-    from thop import profile
-    # Model config
-    
-    # YOLOv7AF-Base config
-    class Yolov7AFBaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.width    = 1.0
-            self.depth    = 1.0
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            ## Head
-            self.head_dim  = 256
-
-    cfg = Yolov7AFBaseConfig()
-    cfg.num_classes = 20
-    # Build a pred layer
-    pred = Yolov7DetPredLayer(cfg)
-
-    # Inference
-    cls_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                 torch.randn(1, cfg.head_dim, 40, 40),
-                 torch.randn(1, cfg.head_dim, 20, 20),]
-    reg_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                 torch.randn(1, cfg.head_dim, 40, 40),
-                 torch.randn(1, cfg.head_dim, 20, 20),]
-    t0 = time.time()
-    output = pred(cls_feats, reg_feats)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('====== Pred output ======= ')
-    pred_obj = output["pred_obj"]
-    pred_cls = output["pred_cls"]
-    pred_reg = output["pred_reg"]
-    pred_box = output["pred_box"]
-    anchors  = output["anchors"]
-    
-    for level in range(len(cfg.out_stride)):
-        print("- Level-{} : objectness       -> {}".format(level, pred_obj[level].shape))
-        print("- Level-{} : classification   -> {}".format(level, pred_cls[level].shape))
-        print("- Level-{} : delta regression -> {}".format(level, pred_reg[level].shape))
-        print("- Level-{} : bbox regression  -> {}".format(level, pred_box[level].shape))
-        print("- Level-{} : anchor boxes     -> {}".format(level, anchors[level].shape))
-
-    flops, params = profile(pred, inputs=(cls_feats, reg_feats, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))

+ 1 - 1
yolo/train.py

@@ -23,7 +23,7 @@ from config import build_config
 from dataset.build import build_dataset, build_transform
 from dataset.build import build_dataset, build_transform
 
 
 # ----------------- Evaluator Components -----------------
 # ----------------- Evaluator Components -----------------
-from evaluator.map_evaluator import MapEvaluator
+from map_evaluator import MapEvaluator
 
 
 # ----------------- Model Components -----------------
 # ----------------- Model Components -----------------
 from models import build_model
 from models import build_model