yjh0410 há 11 meses atrás
pai
commit
1a10122af0

+ 1 - 1
yolo/config/yolov4_config.py

@@ -26,7 +26,7 @@ class Yolov4Config(object):
         self.val_conf_thresh = 0.001
         self.val_nms_thresh  = 0.7
         self.test_topk = 100
-        self.test_conf_thresh = 0.3
+        self.test_conf_thresh = 0.45
         self.test_nms_thresh  = 0.5
 
         # ---------------- Assignment config ----------------

+ 5 - 49
yolo/config/yolov7_config.py

@@ -2,27 +2,16 @@
 
 
 def build_yolov7_config(args):
-    if   args.model == 'yolov7_t':
-        return Yolov7AFTConfig()
-    elif args.model == 'yolov7_l':
-        return Yolov7AFLConfig()
-    else:
-        raise NotImplementedError("No config for model: {}".format(args.model))
+    return Yolov7AFConfig()
     
-# YOLOv7AF-Base config
-class Yolov7AFBaseConfig(object):
+# Anchor-free YOLOv7 config
+class Yolov7AFConfig(object):
     def __init__(self) -> None:
         # ---------------- Model config ----------------
-        self.width    = 1.0
         self.out_stride = [8, 16, 32]
         self.max_stride = 32
-        self.model_scale = "b"
         ## Backbone
         self.use_pretrained = True
-        ## FPN
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 4
-        self.fpn_block_dw = 1
         ## Head
         self.head_dim     = 256
         self.num_cls_head = 2
@@ -34,7 +23,7 @@ class Yolov7AFBaseConfig(object):
         self.val_conf_thresh = 0.001
         self.val_nms_thresh  = 0.7
         self.test_topk = 100
-        self.test_conf_thresh = 0.4
+        self.test_conf_thresh = 0.45
         self.test_nms_thresh  = 0.5
 
         # ---------------- Assignment config ----------------
@@ -73,7 +62,7 @@ class Yolov7AFBaseConfig(object):
         # ---------------- Data process config ----------------
         self.aug_type = 'yolo'
         self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.15
+        self.mixup_prob  = 0.1
         self.copy_paste  = 0.0           # approximated by the YOLOX's mixup
         self.multi_scale = [0.5, 1.25]   # multi scale: [img_size * 0.5, img_size * 1.25]
         ## Pixel mean & std
@@ -97,36 +86,3 @@ class Yolov7AFBaseConfig(object):
         config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
         for k, v in config_dict.items():
             print("{} : {}".format(k, v))
-
-# YOLOv7-S
-class Yolov7AFTConfig(Yolov7AFBaseConfig):
-    def __init__(self) -> None:
-        super().__init__()
-        # ---------------- Model config ----------------
-        self.width = 0.50
-        self.model_scale = "t"
-        self.use_pretrained = True
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 2
-        self.fpn_block_dw = 1
-
-        # ---------------- Data process config ----------------
-        self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.0
-        self.copy_paste  = 0.5
-
-# YOLOv7-L
-class Yolov7AFLConfig(Yolov7AFBaseConfig):
-    def __init__(self) -> None:
-        super().__init__()
-        # ---------------- Model config ----------------
-        self.width = 1.0
-        self.model_scale = "l"
-        self.fpn_expansions = [0.5, 0.5]
-        self.fpn_block_bw = 4
-        self.fpn_block_dw = 1
-
-        # ---------------- Data process config ----------------
-        self.mosaic_prob = 1.0
-        self.mixup_prob  = 0.1
-        self.copy_paste  = 0.5

+ 1 - 1
yolo/eval.py

@@ -1,7 +1,7 @@
 import argparse
 import torch
 
-from evaluator.map_evaluator import MapEvaluator
+from map_evaluator import MapEvaluator
 from dataset.build import build_dataset, build_transform
 from utils.misc import load_weight
 

+ 0 - 0
yolo/evaluator/map_evaluator.py → yolo/map_evaluator.py


+ 26 - 0
yolo/models/yolov3/yolov3.py

@@ -54,7 +54,33 @@ class Yolov3(nn.Module):
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                              for head in self.non_shared_heads
                              ])                 
+
+        # init pred layers
+        self.init_weight()
     
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
     def generate_anchors(self, level, fmp_size):
         """
             fmp_size: (List) [H, W]

+ 3 - 3
yolo/models/yolov4/loss.py

@@ -10,9 +10,9 @@ class SetCriterion(object):
         self.cfg = cfg
         self.num_classes = cfg.num_classes
         # loss weight
-        self.loss_obj_weight = cfg.loss_obj_weight
-        self.loss_cls_weight = cfg.loss_cls_weight
-        self.loss_box_weight = cfg.loss_box_weight
+        self.loss_obj_weight = cfg.loss_obj
+        self.loss_cls_weight = cfg.loss_cls
+        self.loss_box_weight = cfg.loss_box
 
         # matcher
         self.matcher = Yolov4Matcher(self.num_classes, 3, cfg.anchor_size, cfg.iou_thresh)

+ 26 - 0
yolo/models/yolov4/yolov4.py

@@ -54,7 +54,33 @@ class Yolov4(nn.Module):
                             [nn.Conv2d(head.reg_head_dim, 4 * self.num_anchors, kernel_size=1) 
                              for head in self.non_shared_heads
                              ])                 
+
+        # init pred layers
+        self.init_weight()
     
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
     def generate_anchors(self, level, fmp_size):
         """
             fmp_size: (List) [H, W]

+ 0 - 56
yolo/models/yolov7/README.md

@@ -1,56 +0,0 @@
-# YOLOv7:
-
-|    Model    |   Backbone    | Batch | Scale | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
-|-------------|---------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
-| YOLOv7-Tiny | ELANNet-Tiny  | 8xb16 |  640  |         39.5           |       58.5        |   22.6            |   7.9              | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_tiny_coco.pth) |
-| YOLOv7      | ELANNet-Large | 8xb16 |  640  |         49.5           |       68.8        |   144.6           |   44.0             | [ckpt](https://github.com/yjh0410/RT-ODLab/releases/download/yolo_tutorial_ckpt/yolov7_coco.pth) |
-| YOLOv7-X    | ELANNet-Huge  |       |  640  |                        |                   |                   |                    |  |
-
-- For training, we train `YOLOv7` and `YOLOv7-Tiny` with 300 epochs on 8 GPUs.
-- For data augmentation, we use the [YOLOX-style](https://github.com/Megvii-BaseDetection/YOLOX) augmentation including the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation.
-- For optimizer, we use `AdamW` with weight decay 0.05 and per image learning rate 0.001 / 64.
-- For learning rate scheduler, we use Cosine decay scheduler.
-- For YOLOv7's structure, we replace the coupled head with the YOLOX-style decoupled head.
-- I think YOLOv7 uses too many training tricks, such as `anchor box`, `AuxiliaryHead`, `RepConv`, `Mosaic9x` and so on, making the picture of YOLO too complicated, which is against the development concept of the YOLO series. Otherwise, why don't we use the DETR series? It's nothing more than doing some acceleration optimization on DETR. Therefore, I was faithful to my own technical aesthetics and realized a cleaner and simpler YOLOv7, but without the blessing of so many tricks, I did not reproduce all the performance, which is a pity.
-- I have no more GPUs to train my `YOLOv7-X`.
-
-## Train YOLOv7
-### Single GPU
-Taking training YOLOv7-Tiny on COCO as the example,
-```Shell
-python train.py --cuda -d coco --root path/to/coco -m yolov7_tiny -bs 16 -size 640 --wp_epoch 3 --max_epoch 300 --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --multi_scale 
-```
-
-### Multi GPU
-Taking training YOLOv7-Tiny on COCO as the example,
-```Shell
-python -m torch.distributed.run --nproc_per_node=8 train.py --cuda -dist -d coco --root /data/datasets/ -m yolov7_tiny -bs 128 -size 640 --wp_epoch 3 --max_epoch 300  --eval_epoch 10 --no_aug_epoch 20 --ema --fp16 --sybn --multi_scale --save_folder weights/ 
-```
-
-## Test YOLOv7
-Taking testing YOLOv7-Tiny on COCO-val as the example,
-```Shell
-python test.py --cuda -d coco --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth -size 640 -vt 0.4 --show 
-```
-
-## Evaluate YOLOv7
-Taking evaluating YOLOv7-Tiny on COCO-val as the example,
-```Shell
-python eval.py --cuda -d coco-val --root path/to/coco -m yolov7_tiny --weight path/to/yolov7_tiny.pth 
-```
-
-## Demo
-### Detect with Image
-```Shell
-python demo.py --mode image --path_to_img path/to/image_dirs/ --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show
-```
-
-### Detect with Video
-```Shell
-python demo.py --mode video --path_to_vid path/to/video --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
-```
-
-### Detect with Camera
-```Shell
-python demo.py --mode camera --cuda -m yolov7_tiny --weight path/to/weight -size 640 -vt 0.4 --show --gif
-```

+ 2 - 10
yolo/models/yolov7/build.py

@@ -1,5 +1,3 @@
-import torch.nn as nn
-
 from .loss import SetCriterion
 from .yolov7 import Yolov7
 
@@ -8,17 +6,11 @@ from .yolov7 import Yolov7
 def build_yolov7(cfg, is_val=False):
     # -------------- Build YOLO --------------
     model = Yolov7(cfg, is_val)
-
-    # -------------- Initialize YOLO --------------
-    for m in model.modules():
-        if isinstance(m, nn.BatchNorm2d):
-            m.eps = 1e-3
-            m.momentum = 0.03    
-            
+  
     # -------------- Build criterion --------------
     criterion = None
     if is_val:
         # build criterion for training
         criterion = SetCriterion(cfg)
         
-    return model, criterion
+    return model, criterion

+ 12 - 10
yolo/models/yolov7/loss.py

@@ -1,20 +1,24 @@
 import torch
 import torch.nn.functional as F
+from .matcher import SimOTA
 from utils.box_ops import get_ious
 from utils.distributed_utils import get_world_size, is_dist_avail_and_initialized
 
-from .matcher import YoloxMatcher
-
 
 class SetCriterion(object):
     def __init__(self, cfg):
         self.cfg = cfg
         self.num_classes = cfg.num_classes
+        # loss weight
         self.loss_obj_weight = cfg.loss_obj
         self.loss_cls_weight = cfg.loss_cls
         self.loss_box_weight = cfg.loss_box
         # matcher
-        self.matcher = YoloxMatcher(cfg.num_classes, cfg.ota_center_sampling_radius, cfg.ota_topk_candidate)
+        self.matcher = SimOTA(
+            num_classes=self.num_classes,
+            center_sampling_radius=cfg.ota_center_sampling_radius,
+            topk_candidate=cfg.ota_topk_candidate
+            )
 
     def loss_objectness(self, pred_obj, gt_obj):
         loss_obj = F.binary_cross_entropy_with_logits(pred_obj, gt_obj, reduction='none')
@@ -37,21 +41,22 @@ class SetCriterion(object):
         """
             outputs['pred_obj']: List(Tensor) [B, M, 1]
             outputs['pred_cls']: List(Tensor) [B, M, C]
-            outputs['pred_reg']: List(Tensor) [B, M, 4]
+            outputs['pred_box']: List(Tensor) [B, M, 4]
             outputs['pred_box']: List(Tensor) [B, M, 4]
             outputs['strides']: List(Int) [8, 16, 32] output stride
             targets: (List) [dict{'boxes': [...], 
                                  'labels': [...], 
                                  'orig_size': ...}, ...]
         """
-        bs = outputs['pred_cls'][0].shape[0]
-        device = outputs['pred_cls'][0].device
         fpn_strides = outputs['strides']
         anchors = outputs['anchors']
+
         # preds: [B, M, C]
         obj_preds = torch.cat(outputs['pred_obj'], dim=1)
         cls_preds = torch.cat(outputs['pred_cls'], dim=1)
         box_preds = torch.cat(outputs['pred_box'], dim=1)
+        device = box_preds.device
+        bs = box_preds.shape[0]
 
         # label assignment
         cls_targets = []
@@ -135,7 +140,4 @@ class SetCriterion(object):
         )
 
         return loss_dict
-
-
-if __name__ == "__main__":
-    pass
+    

+ 4 - 2
yolo/models/yolov7/matcher.py

@@ -8,7 +8,7 @@ import torch.nn.functional as F
 from utils.box_ops import *
 
 
-class YoloxMatcher(object):
+class SimOTA(object):
     """
         This code referenced to https://github.com/Megvii-BaseDetection/YOLOX/blob/main/yolox/models/yolo_head.py
     """
@@ -81,6 +81,7 @@ class YoloxMatcher(object):
 
         return fg_mask, assigned_labels, assigned_ious, assigned_indexs
 
+
     def get_in_boxes_info(
         self,
         gt_bboxes,   # [N, 4]
@@ -140,7 +141,8 @@ class YoloxMatcher(object):
             is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor]
         )
         return is_in_boxes_anchor, is_in_boxes_and_center
-
+    
+    
     def dynamic_k_matching(
         self, 
         cost, 

+ 67 - 75
yolo/models/yolov7/modules.py

@@ -1,112 +1,86 @@
+import numpy as np
 import torch
 import torch.nn as nn
-from typing import List
 
 
 # --------------------- Basic modules ---------------------
 class ConvModule(nn.Module):
     def __init__(self, 
-                 in_dim,        # in channels
-                 out_dim,       # out channels 
-                 kernel_size=1, # kernel size 
-                 padding=0,     # padding
-                 stride=1,      # padding
-                 dilation=1,    # dilation
-                ):
+                 in_dim: int,          # in channels
+                 out_dim: int,         # out channels 
+                 kernel_size: int = 1, # kernel size 
+                 stride:int = 1,       # padding
+                 ):
         super(ConvModule, self).__init__()
-        self.conv = nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, bias=False)
-        self.norm = nn.BatchNorm2d(out_dim)
-        self.act  = nn.SiLU(inplace=True)
+        convs = []
+        convs.append(nn.Conv2d(in_dim, out_dim, kernel_size=kernel_size, padding=kernel_size//2, stride=stride, bias=False))
+        convs.append(nn.BatchNorm2d(out_dim))
+        convs.append(nn.SiLU(inplace=True))
+        self.convs = nn.Sequential(*convs)
 
     def forward(self, x):
-        return self.act(self.norm(self.conv(x)))
-
-
-# ---------------------------- Basic Modules ----------------------------
-class MDown(nn.Module):
-    def __init__(self, in_dim: int, out_dim: int, ):
-        super().__init__()
-        inter_dim = out_dim // 2
-        self.downsample_1 = nn.Sequential(
-            nn.MaxPool2d((2, 2), stride=2),
-            ConvModule(in_dim, inter_dim, kernel_size=1)
-        )
-        self.downsample_2 = nn.Sequential(
-            ConvModule(in_dim, inter_dim, kernel_size=1),
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1, stride=2)
-        )
-        if in_dim == out_dim:
-            self.output_proj = nn.Identity()
-        else:
-            self.output_proj = ConvModule(inter_dim * 2, out_dim, kernel_size=1)
+        return self.convs(x)
 
-    def forward(self, x):
-        x1 = self.downsample_1(x)
-        x2 = self.downsample_2(x)
-
-        out = self.output_proj(torch.cat([x1, x2], dim=1))
+class ELANBlock(nn.Module):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 expansion: float = 0.5,
+                 branch_depth: int = 2,
+                 ):
+        super(ELANBlock, self).__init__()
+        inter_dim = int(in_dim * expansion)
+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv3 = nn.Sequential(*[ConvModule(inter_dim, inter_dim, kernel_size=3)
+                                   for _ in range(round(branch_depth))
+                                   ])
+        self.cv4 = nn.Sequential(*[ConvModule(inter_dim, inter_dim, kernel_size=3)
+                                   for _ in range(round(branch_depth))
+                                   ])
+        self.out = ConvModule(inter_dim*4, out_dim, kernel_size=1)
 
-        return out
 
-class ELANLayer(nn.Module):
-    def __init__(self,
-                 in_dim,
-                 out_dim,
-                 expansion  :float = 0.5,
-                 num_blocks :int   = 1,
-                 ) -> None:
-        super(ELANLayer, self).__init__()
-        self.inter_dim = round(in_dim * expansion)
-        self.conv_layer_1 = ConvModule(in_dim, self.inter_dim, kernel_size=1)
-        self.conv_layer_2 = ConvModule(in_dim, self.inter_dim, kernel_size=1)
-        self.conv_layer_3 = ConvModule(self.inter_dim * 4, out_dim, kernel_size=1)
-        self.elan_layer_1 = nn.Sequential(*[ConvModule(self.inter_dim, self.inter_dim, kernel_size=3, padding=1)
-                                           for _ in range(num_blocks)])
-        self.elan_layer_2 = nn.Sequential(*[ConvModule(self.inter_dim, self.inter_dim, kernel_size=3, padding=1)
-                                           for _ in range(num_blocks)])
 
     def forward(self, x):
-        # Input proj
-        x1 = self.conv_layer_1(x)
-        x2 = self.conv_layer_2(x)
-        x3 = self.elan_layer_1(x2)
-        x4 = self.elan_layer_2(x3)
-    
-        out = self.conv_layer_3(torch.cat([x1, x2, x3, x4], dim=1))
+        x1 = self.cv1(x)
+        x2 = self.cv2(x)
+        x3 = self.cv3(x2)
+        x4 = self.cv4(x3)
+        out = self.out(torch.cat([x1, x2, x3, x4], dim=1))
 
         return out
 
-class ELANLayerFPN(nn.Module):
+class ELANBlockFPN(nn.Module):
     def __init__(self,
-                 in_dim,
-                 out_dim,
-                 expansions   :List = [0.5, 0.5],
-                 branch_width :int  = 4,
-                 branch_depth :int  = 1,
+                 in_dim: int,
+                 out_dim: int,
+                 expansion: float = 0.5,
+                 branch_width: int = 4,
+                 branch_depth: int = 1,
                  ):
-        super(ELANLayerFPN, self).__init__()
+        super(ELANBlockFPN, self).__init__()
         # Basic parameters
-        inter_dim  = round(in_dim * expansions[0])
-        inter_dim2 = round(inter_dim * expansions[1]) 
+        inter_dim = int(in_dim * expansion)
+        inter_dim2 = int(inter_dim * expansion) 
         # Network structure
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv3 = nn.ModuleList()
         for idx in range(round(branch_width)):
             if idx == 0:
-                cvs = [ConvModule(inter_dim, inter_dim2, kernel_size=3, padding=1)]
+                cvs = [ConvModule(inter_dim, inter_dim2, kernel_size=3)]
             else:
-                cvs = [ConvModule(inter_dim2, inter_dim2, kernel_size=3, padding=1)]
+                cvs = [ConvModule(inter_dim2, inter_dim2, kernel_size=3)]
             # deeper
             if round(branch_depth) > 1:
                 for _ in range(1, round(branch_depth)):
-                    cvs.append(ConvModule(inter_dim2, inter_dim2, kernel_size=3, padding=1))
+                    cvs.append(ConvModule(inter_dim2, inter_dim2, kernel_size=3))
                 self.cv3.append(nn.Sequential(*cvs))
             else:
                 self.cv3.append(cvs[0])
 
-        self.output_proj = ConvModule(inter_dim*2+inter_dim2*len(self.cv3), out_dim, kernel_size=1)
-
+        self.out = ConvModule(inter_dim*2 + inter_dim2*len(self.cv3), out_dim, kernel_size=1)
 
     def forward(self, x):
         x1 = self.cv1(x)
@@ -116,6 +90,24 @@ class ELANLayerFPN(nn.Module):
             y1 = inter_outs[-1]
             y2 = m(y1)
             inter_outs.append(y2)
-        out = self.output_proj(torch.cat(inter_outs, dim=1))
+        out = self.out(torch.cat(inter_outs, dim=1))
+
+        return out
+
+class DownSample(nn.Module):
+    def __init__(self, in_dim, out_dim):
+        super().__init__()
+        inter_dim = out_dim // 2
+        self.mp = nn.MaxPool2d((2, 2), 2)
+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
+        self.cv2 = nn.Sequential(
+            ConvModule(in_dim, inter_dim, kernel_size=1),
+            ConvModule(inter_dim, inter_dim, kernel_size=3, stride=2)
+        )
+
+    def forward(self, x):
+        x1 = self.cv1(self.mp(x))
+        x2 = self.cv2(x)
+        out = torch.cat([x1, x2], dim=1)
 
         return out

+ 119 - 44
yolo/models/yolov7/yolov7.py

@@ -1,49 +1,96 @@
-# --------------- Torch components ---------------
 import torch
 import torch.nn as nn
 
+from utils.misc import multiclass_nms
+
 # --------------- Model components ---------------
-from .yolov7_backbone import Yolov7TBackbone, Yolov7LBackbone
+from .yolov7_backbone import Yolov7Backbone
 from .yolov7_neck     import SPPFBlockCSP
 from .yolov7_pafpn    import Yolov7PaFPN
-from .yolov7_head     import Yolov7DetHead
-from .yolov7_pred     import Yolov7DetPredLayer
+from .yolov7_head     import DecoupledHead
 
 # --------------- External components ---------------
 from utils.misc import multiclass_nms
 
 
-# Yolov7
 class Yolov7(nn.Module):
-    def __init__(self,
-                 cfg,
-                 is_val = False,
-                 ) -> None:
+    def __init__(self, cfg, is_val: bool = False) -> None:
         super(Yolov7, self).__init__()
         # ---------------------- Basic setting ----------------------
-        assert cfg.model_scale in ["t", "l", "x"]
         self.cfg = cfg
         self.num_classes = cfg.num_classes
+        self.out_stride = cfg.out_stride
+        self.num_levels = len(cfg.out_stride)
+
         ## Post-process parameters
         self.topk_candidates  = cfg.val_topk        if is_val else cfg.test_topk
         self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
         self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
         self.no_multi_labels  = False if is_val else True
         
-        # ---------------------- Network Parameters ----------------------
-        ## Backbone
-        self.backbone = Yolov7TBackbone(cfg) if cfg.model_scale == "t" else Yolov7LBackbone(cfg)
-        self.pyramid_feat_dims = self.backbone.feat_dims[-3:]
-        ## Neck: SPP
-        self.neck = SPPFBlockCSP(self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1]//2)
-        self.pyramid_feat_dims[-1] = self.neck.out_dim
-        ## Neck: FPN
-        self.fpn = Yolov7PaFPN(cfg, self.pyramid_feat_dims)
-        ## Head
-        self.head = Yolov7DetHead(cfg, self.fpn.out_dims)
-        ## Pred
-        self.pred = Yolov7DetPredLayer(cfg)
-
+        # ------------------- Network Structure -------------------
+        self.backbone = Yolov7Backbone(use_pretrained=cfg.use_pretrained)
+        self.neck     = SPPFBlockCSP(self.backbone.feat_dims[-1], self.backbone.feat_dims[-1] // 2, expand_ratio=0.5)
+        self.backbone.feat_dims[-1] = self.backbone.feat_dims[-1] // 2
+        self.fpn      = Yolov7PaFPN(self.backbone.feat_dims[-3:], head_dim=cfg.head_dim)
+        self.non_shared_heads = nn.ModuleList([DecoupledHead(cfg, in_dim)
+                                               for in_dim in self.fpn.fpn_out_dims
+                                               ])
+
+        ## 预测层
+        self.obj_preds = nn.ModuleList(
+                            [nn.Conv2d(head.reg_head_dim, 1, kernel_size=1)
+                             for head in self.non_shared_heads
+                             ]) 
+        self.cls_preds = nn.ModuleList(
+                            [nn.Conv2d(head.cls_head_dim, self.num_classes, kernel_size=1) 
+                             for head in self.non_shared_heads
+                             ]) 
+        self.reg_preds = nn.ModuleList(
+                            [nn.Conv2d(head.reg_head_dim, 4, kernel_size=1) 
+                             for head in self.non_shared_heads
+                             ])
+        
+        # init pred layers
+        self.init_weight()
+    
+    def init_weight(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        for obj_pred in self.obj_preds:
+            b = obj_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        for cls_pred in self.cls_preds:
+            b = cls_pred.bias.view(1, -1)
+            b.data.fill_(bias_value.item())
+            cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        for reg_pred in self.reg_preds:
+            b = reg_pred.bias.view(-1, )
+            b.data.fill_(1.0)
+            reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+            w = reg_pred.weight
+            w.data.fill_(0.)
+            reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
+    def generate_anchors(self, level, fmp_size):
+        """
+            fmp_size: (List) [H, W]
+        """
+        # generate grid cells
+        fmp_h, fmp_w = fmp_size
+        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
+        # [H, W, 2] -> [HW, 2]
+        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
+        anchors += 0.5  # add center offset
+        anchors *= self.out_stride[level]
+
+        return anchors
+        
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         We process predictions at each scale hierarchically
@@ -66,8 +113,7 @@ class Yolov7(nn.Module):
             box_pred_i = box_pred_i[0]
             if self.no_multi_labels:
                 # [M,]
-                scores, labels = torch.max(
-                    torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
                 # Keep top k top scoring indices only.
                 num_topk = min(self.topk_candidates, box_pred_i.size(0))
@@ -126,33 +172,62 @@ class Yolov7(nn.Module):
         return bboxes, scores, labels
     
     def forward(self, x):
-        # ---------------- Backbone ----------------
+        bs = x.shape[0]
         pyramid_feats = self.backbone(x)
-
-        # ---------------- Neck: SPP ----------------
         pyramid_feats[-1] = self.neck(pyramid_feats[-1])
-        
-        # ---------------- Neck: PaFPN ----------------
         pyramid_feats = self.fpn(pyramid_feats)
 
-        # ---------------- Heads ----------------
-        cls_feats, reg_feats = self.head(pyramid_feats)
-
-        # ---------------- Preds ----------------
-        outputs = self.pred(cls_feats, reg_feats)
-        outputs['image_size'] = [x.shape[2], x.shape[3]]
+        all_anchors = []
+        all_obj_preds = []
+        all_cls_preds = []
+        all_box_preds = []
+        all_reg_preds = []
+        for level, (feat, head) in enumerate(zip(pyramid_feats, self.non_shared_heads)):
+            cls_feat, reg_feat = head(feat)
+
+            # [B, C, H, W]
+            obj_pred = self.obj_preds[level](reg_feat)
+            cls_pred = self.cls_preds[level](cls_feat)
+            reg_pred = self.reg_preds[level](reg_feat)
+
+            B, _, H, W = cls_pred.size()
+            fmp_size = [H, W]
+            # generate anchor boxes: [M, 4]
+            anchors = self.generate_anchors(level, fmp_size)
+            anchors = anchors.to(x.device)
+            
+            # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
+            obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
+            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
+            reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
+
+            # decode bbox
+            ctr_pred = reg_pred[..., :2] * self.out_stride[level] + anchors[..., :2]
+            wh_pred = torch.exp(reg_pred[..., 2:]) * self.out_stride[level]
+            pred_x1y1 = ctr_pred - wh_pred * 0.5
+            pred_x2y2 = ctr_pred + wh_pred * 0.5
+            box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
+
+            all_obj_preds.append(obj_pred)
+            all_cls_preds.append(cls_pred)
+            all_box_preds.append(box_pred)
+            all_reg_preds.append(reg_pred)
+            all_anchors.append(anchors)
 
         if not self.training:
-            all_obj_preds = outputs['pred_obj']
-            all_cls_preds = outputs['pred_cls']
-            all_box_preds = outputs['pred_box']
-
-            # post process
             bboxes, scores, labels = self.post_process(all_obj_preds, all_cls_preds, all_box_preds)
             outputs = {
                 "scores": scores,
                 "labels": labels,
                 "bboxes": bboxes
             }
-        
-        return outputs
+        else:
+            outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
+                       "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
+                       "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
+                       "pred_reg": all_reg_preds,        # List(Tensor) [B, M, 4]
+                       "anchors": all_anchors,           # List(Tensor) [M, 2]
+                       "strides": self.out_stride,       # List(Int) [8, 16, 32]
+                       }
+
+        return outputs 

+ 58 - 161
yolo/models/yolov7/yolov7_backbone.py

@@ -2,142 +2,67 @@ import torch
 import torch.nn as nn
 
 try:
-    from .modules import ConvModule, MDown, ELANLayer
+    from .modules import ConvModule, ELANBlock, DownSample
 except:
-    from  modules import ConvModule, MDown, ELANLayer
+    from  modules import ConvModule, ELANBlock, DownSample
+    
 
-# IN1K pretrained weight
-pretrained_urls = {
-    't': None,
-    'l': None,
-    'x': None,
+in1k_pretrained_urls = {
+    "elannet_large": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/yolov7_elannet_large.pth",
 }
 
-# ELANNet-Tiny
-class Yolov7TBackbone(nn.Module):
-    def __init__(self, cfg):
-        super(Yolov7TBackbone, self).__init__()
-        # ---------------- Basic parameters ----------------
-        self.model_scale = cfg.model_scale
-        self.elan_depth = 1
-        self.feat_dims = [round(64  * cfg.width),
-                          round(128 * cfg.width),
-                          round(256 * cfg.width),
-                          round(512 * cfg.width),
-                          round(1024 * cfg.width)]
-
-        # ---------------- Model parameters ----------------
-        self.layer_1 = self.make_stem(3, self.feat_dims[0])
-        self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], expansion=0.5, downsample="conv")
-        self.layer_3 = self.make_block(self.feat_dims[1], self.feat_dims[2], expansion=0.5, downsample="maxpool")
-        self.layer_4 = self.make_block(self.feat_dims[2], self.feat_dims[3], expansion=0.5, downsample="maxpool")
-        self.layer_5 = self.make_block(self.feat_dims[3], self.feat_dims[4], expansion=0.5, downsample="maxpool")
+# --------------------- Yolov7 backbone (CSPDarkNet-53 with SiLU) -----------------------
+class Yolov7Backbone(nn.Module):
+    def __init__(self, use_pretrained: bool = False):
+        super(Yolov7Backbone, self).__init__()
+        self.feat_dims = [32, 64, 128, 256, 512, 1024, 1024]
+        self.squeeze_ratios = [0.5, 0.5, 0.5, 0.25]  # Stage-1 -> Stage-4
+        self.branch_depths = [2, 2, 2, 2]            # Stage-1 -> Stage-4
+        self.use_pretrained = use_pretrained
+
+        # -------------------- Network parameters --------------------
+        ## P1/2
+        self.layer_1 = nn.Sequential(
+            ConvModule(3, self.feat_dims[0], kernel_size=3),      
+            ConvModule(self.feat_dims[0], self.feat_dims[1], kernel_size=3, stride=2),
+            ConvModule(self.feat_dims[1], self.feat_dims[1], kernel_size=3)
+        )
+        ## P2/4: Stage-1
+        self.layer_2 = nn.Sequential(   
+            ConvModule(self.feat_dims[1], self.feat_dims[2], kernel_size=3, stride=2),             
+            ELANBlock(self.feat_dims[2], self.feat_dims[3], self.squeeze_ratios[0], self.branch_depths[0])
+        )
+        ## P3/8: Stage-2
+        self.layer_3 = nn.Sequential(
+            DownSample(self.feat_dims[3], self.feat_dims[3]),
+            ELANBlock(self.feat_dims[3], self.feat_dims[4], self.squeeze_ratios[1], self.branch_depths[1])
+        )
+        ## P4/16: Stage-3
+        self.layer_4 = nn.Sequential(
+            DownSample(self.feat_dims[4], self.feat_dims[4]),
+            ELANBlock(self.feat_dims[4], self.feat_dims[5], self.squeeze_ratios[2], self.branch_depths[2])
+        )
+        ## P5/32: Stage-4
+        self.layer_5 = nn.Sequential(
+            DownSample(self.feat_dims[5], self.feat_dims[5]),
+            ELANBlock(self.feat_dims[5], self.feat_dims[6], self.squeeze_ratios[3], self.branch_depths[3])
+        )
 
         # Initialize all layers
         self.init_weights()
         
-        # Load imagenet pretrained weight
-        if cfg.use_pretrained:
-            self.load_pretrained()
-        
     def init_weights(self):
         """Initialize the parameters."""
         for m in self.modules():
             if isinstance(m, torch.nn.Conv2d):
                 m.reset_parameters()
 
-    def load_pretrained(self):
-        url = pretrained_urls[self.model_scale]
-        if url is not None:
-            print('Loading backbone pretrained weight from : {}'.format(url))
-            # checkpoint state dict
-            checkpoint = torch.hub.load_state_dict_from_url(
-                url=url, map_location="cpu", check_hash=True)
-            checkpoint_state_dict = checkpoint.pop("model")
-            # model state dict
-            model_state_dict = self.state_dict()
-            # check
-            for k in list(checkpoint_state_dict.keys()):
-                if k in model_state_dict:
-                    shape_model = tuple(model_state_dict[k].shape)
-                    shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
-                    if shape_model != shape_checkpoint:
-                        checkpoint_state_dict.pop(k)
-                else:
-                    checkpoint_state_dict.pop(k)
-                    print('Unused key: ', k)
-            # load the weight
-            self.load_state_dict(checkpoint_state_dict)
-        else:
-            print('No pretrained weight for model scale: {}.'.format(self.model_scale))
-
-    def make_stem(self, in_dim, out_dim):
-        stem = ConvModule(in_dim, out_dim, kernel_size=6, padding=2, stride=2)
-        
-        return stem
-
-    def make_block(self, in_dim, out_dim, expansion=0.5, downsample="maxpool"):
-        if downsample == "maxpool":
-            block = nn.Sequential(
-                nn.MaxPool2d((2, 2), stride=2),             
-                ELANLayer(in_dim, out_dim, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        elif downsample == "conv":
-            block = nn.Sequential(
-                ConvModule(in_dim, out_dim, kernel_size=3, padding=1, stride=2),             
-                ELANLayer(out_dim, out_dim, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        else:
-            raise NotImplementedError("Unknown downsample type: {}".format(downsample))
-
-        return block
-    
-    def forward(self, x):
-        c1 = self.layer_1(x)
-        c2 = self.layer_2(c1)
-        c3 = self.layer_3(c2)
-        c4 = self.layer_4(c3)
-        c5 = self.layer_5(c4)
-        outputs = [c3, c4, c5]
-
-        return outputs
-
-# ELANNet-Large
-class Yolov7LBackbone(nn.Module):
-    def __init__(self, cfg):
-        super(Yolov7LBackbone, self).__init__()
-        # ---------------- Basic parameters ----------------
-        self.model_scale = cfg.model_scale
-        self.elan_depth = 2
-        self.feat_dims = [round(64  * cfg.width),
-                          round(128  * cfg.width),
-                          round(256  * cfg.width),
-                          round(512  * cfg.width),
-                          round(1024 * cfg.width),
-                          round(1024 * cfg.width)]
-
-        # ---------------- Model parameters ----------------
-        self.layer_1 = self.make_stem(3, self.feat_dims[0])
-        self.layer_2 = self.make_block(self.feat_dims[0], self.feat_dims[1], self.feat_dims[2], expansion=0.5, conv_downsample=True)
-        self.layer_3 = self.make_block(self.feat_dims[2], self.feat_dims[2], self.feat_dims[3], expansion=0.5)
-        self.layer_4 = self.make_block(self.feat_dims[3], self.feat_dims[3], self.feat_dims[4], expansion=0.5)
-        self.layer_5 = self.make_block(self.feat_dims[4], self.feat_dims[4], self.feat_dims[5], expansion=0.25)
-
-        # Initialize all layers
-        self.init_weights()
-        
         # Load imagenet pretrained weight
-        if cfg.use_pretrained:
+        if self.use_pretrained:
             self.load_pretrained()
-        
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
 
     def load_pretrained(self):
-        url = pretrained_urls[self.model_scale]
+        url = in1k_pretrained_urls["elannet_large"]
         if url is not None:
             print('Loading backbone pretrained weight from : {}'.format(url))
             # checkpoint state dict
@@ -161,63 +86,35 @@ class Yolov7LBackbone(nn.Module):
         else:
             print('No pretrained weight for model scale: {}.'.format(self.model_scale))
 
-    def make_stem(self, in_dim, out_dim):
-        stem = nn.Sequential(
-            ConvModule(in_dim, out_dim//2, kernel_size=3, padding=1, stride=1),
-            ConvModule(out_dim//2, out_dim, kernel_size=3, padding=1, stride=2),
-            ConvModule(out_dim, out_dim, kernel_size=3, padding=1, stride=1)
-
-        )
-
-        return stem
-
-    def make_block(self, in_dim, out_dim_1, out_dim_2, expansion=0.5, conv_downsample=False):
-        if conv_downsample:
-            block = nn.Sequential(
-                ConvModule(in_dim, out_dim_1, kernel_size=3, padding=1, stride=2),             
-                ELANLayer(out_dim_1, out_dim_2, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        else:
-            block = nn.Sequential(
-                MDown(in_dim, out_dim_1),             
-                ELANLayer(out_dim_1, out_dim_2, expansion=expansion, num_blocks=self.elan_depth),
-                )
-        
-        return block
-    
     def forward(self, x):
         c1 = self.layer_1(x)
         c2 = self.layer_2(c1)
         c3 = self.layer_3(c2)
         c4 = self.layer_4(c3)
         c5 = self.layer_5(c4)
+
         outputs = [c3, c4, c5]
 
         return outputs
 
 
-if __name__ == '__main__':
-    import time
+if __name__=='__main__':
     from thop import profile
-    class BaseConfig(object):
-        def __init__(self) -> None:
-            self.use_pretrained = False
-            self.width = 0.5
-            self.model_scale = "t"
-
-    cfg = BaseConfig()
-    model = Yolov7TBackbone(cfg)
-    x = torch.randn(1, 3, 640, 640)
-    t0 = time.time()
+
+    # Build backbone
+    model = Yolov7Backbone(use_pretrained=True)
+
+    # Randomly generate a input data
+    x = torch.randn(2, 3, 640, 640)
+
+    # Inference
     outputs = model(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
+    print(' - the shape of input :  ', x.shape)
     for out in outputs:
-        print(out.shape)
+        print(' - the shape of output : ', out.shape)
 
     x = torch.randn(1, 3, 640, 640)
-    print('==============================')
     flops, params = profile(model, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 34 - 94
yolo/models/yolov7/yolov7_head.py

@@ -1,5 +1,6 @@
 import torch
 import torch.nn as nn
+from typing import List
 
 try:
     from .modules import ConvModule
@@ -7,50 +8,34 @@ except:
     from  modules import ConvModule
 
 
-## Single-level Detection Head
-class DetHead(nn.Module):
-    def __init__(self,
-                 in_dim       :int  = 256,
-                 cls_head_dim :int  = 256,
-                 reg_head_dim :int  = 256,
-                 num_cls_head :int  = 2,
-                 num_reg_head :int  = 2,
-                 ):
+class DecoupledHead(nn.Module):
+    def __init__(self, cfg, in_dim: int = 256):
         super().__init__()
-        # --------- Basic Parameters ----------
         self.in_dim = in_dim
-        self.num_cls_head = num_cls_head
-        self.num_reg_head = num_reg_head
-        
-        # --------- Network Parameters ----------
-        ## cls head
+        self.cls_head_dim = cfg.head_dim
+        self.reg_head_dim = cfg.head_dim
+        self.num_cls_head = cfg.num_cls_head
+        self.num_reg_head = cfg.num_reg_head
+
+        # classification feature head
         cls_feats = []
-        self.cls_head_dim = cls_head_dim
-        for i in range(num_cls_head):
+        for i in range(self.num_cls_head):
             if i == 0:
-                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
+                cls_feats.append(ConvModule(in_dim, self.cls_head_dim, kernel_size=3, stride=1))
             else:
-                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, padding=1, stride=1))
-        ## reg head
+                cls_feats.append(ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, stride=1))
+                
+        # box regression feature head
         reg_feats = []
-        self.reg_head_dim = reg_head_dim
-        for i in range(num_reg_head):
+        for i in range(self.num_reg_head):
             if i == 0:
-                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
+                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, stride=1))
             else:
-                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, padding=1, stride=1))
+                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, stride=1))
 
         self.cls_feats = nn.Sequential(*cls_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
 
-        self.init_weights()
-        
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
         """
             in_feats: (Tensor) [B, C, H, W]
@@ -59,79 +44,34 @@ class DetHead(nn.Module):
         reg_feats = self.reg_feats(x)
 
         return cls_feats, reg_feats
-    
-## Multi-level Detection Head
-class Yolov7DetHead(nn.Module):
-    def __init__(self, cfg, in_dims):
-        super().__init__()
-        self.num_levels = len(cfg.out_stride)
-        ## ----------- Network Parameters -----------
-        self.multi_level_heads = nn.ModuleList(
-            [DetHead(in_dim       = in_dims[level],
-                     cls_head_dim = round(cfg.head_dim * cfg.width),
-                     reg_head_dim = round(cfg.head_dim * cfg.width),
-                     num_cls_head = cfg.num_cls_head,
-                     num_reg_head = cfg.num_reg_head,
-                     ) for level in range(self.num_levels)])
-        
-        # --------- Basic Parameters ----------
-        self.in_dims = in_dims
-        self.cls_head_dim = cfg.head_dim
-        self.reg_head_dim = cfg.head_dim
-
-    def forward(self, feats):
-        """
-            feats: List[(Tensor)] [[B, C, H, W], ...]
-        """
-        cls_feats = []
-        reg_feats = []
-        for feat, head in zip(feats, self.multi_level_heads):
-            # ---------------- Pred ----------------
-            cls_feat, reg_feat = head(feat)
-
-            cls_feats.append(cls_feat)
-            reg_feats.append(reg_feat)
-
-        return cls_feats, reg_feats
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # Model config
     
-    # YOLOv7-Base config
+    # YOLOv2 configuration
     class Yolov7BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
-            self.width    = 0.50
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            self.num_levels = 3
-            ## Head
             self.head_dim  = 256
-            self.num_cls_head   = 2
-            self.num_reg_head   = 2
-
+            self.num_cls_head = 2
+            self.num_reg_head = 2
     cfg = Yolov7BaseConfig()
+
     # Build a head
-    pyramid_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                     torch.randn(1, cfg.head_dim, 40, 40),
-                     torch.randn(1, cfg.head_dim, 20, 20)]
-    head = Yolov7DetHead(cfg, [cfg.head_dim]*3)
+    model = DecoupledHead(cfg, in_dim= 256)
 
+    # Randomly generate a input data
+    x = torch.randn(2, 256, 20, 20)
 
     # Inference
-    t0 = time.time()
-    cls_feats, reg_feats = head(pyramid_feats)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print("====== Yolov7 Head output ======")
-    for level, (cls_f, reg_f) in enumerate(zip(cls_feats, reg_feats)):
-        print("- Level-{} : ".format(level), cls_f.shape, reg_f.shape)
-
-    flops, params = profile(head, inputs=(pyramid_feats, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
-      
+    cls_feats, reg_feats = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of cls feats : ', cls_feats.shape)
+    print(' - the shape of reg feats : ', reg_feats.shape)
+
+    x = torch.randn(1, 256, 20, 20)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 31 - 62
yolo/models/yolov7/yolov7_neck.py

@@ -7,30 +7,18 @@ except:
     from  modules import ConvModule
 
 
-# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv7-AF by Glenn Jocher
 class SPPF(nn.Module):
     """
-        This code referenced to https://github.com/ultralytics/yolov7-AF
+        This code referenced to https://github.com/ultralytics/yolov5
     """
-    def __init__(self, in_dim, out_dim, expansion=0.5):
+    def __init__(self, in_dim, out_dim, expand_ratio=0.5):
         super().__init__()
-        ## ----------- Basic Parameters -----------
-        inter_dim = int(in_dim * expansion)
+        inter_dim = int(in_dim * expand_ratio)
         self.out_dim = out_dim
-        ## ----------- Network Parameters -----------
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
-        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1,)
+        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1)
         self.m = nn.MaxPool2d(kernel_size=5, stride=1, padding=2)
 
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
         x = self.cv1(x)
         y1 = self.m(x)
@@ -38,71 +26,52 @@ class SPPF(nn.Module):
 
         return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
 
-# SPPF block with CSP module
 class SPPFBlockCSP(nn.Module):
-    """
-        CSP Spatial Pyramid Pooling Block
-    """
-    def __init__(self, in_dim, out_dim):
+    def __init__(self,
+                 in_dim: int,
+                 out_dim: int,
+                 expand_ratio: float = 0.5,
+                 ):
         super(SPPFBlockCSP, self).__init__()
-        inter_dim = in_dim // 2
+        inter_dim = int(in_dim * expand_ratio)
         self.out_dim = out_dim
         self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1)
         self.cv2 = ConvModule(in_dim, inter_dim, kernel_size=1)
-        self.module = nn.Sequential(
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1),
-            SPPF(inter_dim, inter_dim, expansion=1.0),
-            ConvModule(inter_dim, inter_dim, kernel_size=3, padding=1),
-            )
+        self.m = nn.Sequential(
+            ConvModule(inter_dim, inter_dim, kernel_size=3),
+            SPPF(inter_dim, inter_dim, expand_ratio=1.0),
+            ConvModule(inter_dim, inter_dim, kernel_size=3)
+        )
         self.cv3 = ConvModule(inter_dim * 2, self.out_dim, kernel_size=1)
 
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
-
     def forward(self, x):
         x1 = self.cv1(x)
-        x2 = self.module(self.cv2(x))
-        y = self.cv3(torch.cat([x1, x2], dim=1))
+        x2 = self.cv2(x)
+        x3 = self.m(x2)
+        y = self.cv3(torch.cat([x1, x3], dim=1))
 
         return y
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # Model config
     
-    # YOLOv7-AF-Base config
-    class Yolov7AFBaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.out_stride = 32
-            self.max_stride = 32
-            ## Neck
-            self.neck_expand_ratio = 0.5
-            self.spp_pooling_size  = 5
-
-    cfg = Yolov7AFBaseConfig()
-    # Build a head
+    # Build a neck
     in_dim  = 512
     out_dim = 512
-    neck = SPPF(in_dim, out_dim)
+    model = SPPFBlockCSP(512, 512, expand_ratio=0.5)
+
+    # Randomly generate a input data
+    x = torch.randn(2, in_dim, 20, 20)
 
     # Inference
+    output = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
+
     x = torch.randn(1, in_dim, 20, 20)
-    t0 = time.time()
-    output = neck(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('Neck output: ', output.shape)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))
 
-    flops, params = profile(neck, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))

+ 79 - 97
yolo/models/yolov7/yolov7_pafpn.py

@@ -1,115 +1,111 @@
-from typing import List
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
 try:
-    from .modules import ConvModule, ELANLayerFPN, MDown
+    from .modules import ConvModule, ELANBlockFPN, DownSample
 except:
-    from  modules import ConvModule, ELANLayerFPN, MDown
+    from  modules import ConvModule, ELANBlockFPN, DownSample
 
 
-# Yolov7 af PaFPN
+# PaFPN-ELAN (YOLOv7's)
 class Yolov7PaFPN(nn.Module):
-    def __init__(self, cfg, in_dims: List = [512, 1024, 512]):
+    def __init__(self, 
+                 in_dims = [512, 1024, 512],
+                 head_dim = 256,
+                 ):
         super(Yolov7PaFPN, self).__init__()
         # ----------------------------- Basic parameters -----------------------------
         self.in_dims = in_dims
-        c3, c4, c5 = in_dims
+        self.head_dim = head_dim
+        self.fpn_out_dims = [head_dim] * 3
 
-        # ----------------------------- Yolov7's Top-down FPN -----------------------------
+        self.branch_width = 4
+        self.branch_depth = 1
+
+        c3, c4, c5 = self.in_dims
+
+        # ----------------------------- Top-down FPN -----------------------------
         ## P5 -> P4
-        self.reduce_layer_1 = ConvModule(c5, round(256*cfg.width), kernel_size=1)
-        self.reduce_layer_2 = ConvModule(c4, round(256*cfg.width), kernel_size=1)
-        self.top_down_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
-                                             out_dim      = round(256*cfg.width),
-                                             expansions   = cfg.fpn_expansions,
-                                             branch_width = cfg.fpn_block_bw,
-                                             branch_depth = cfg.fpn_block_dw,
+        self.reduce_layer_1 = ConvModule(c5, 256, kernel_size=1)
+        self.reduce_layer_2 = ConvModule(c4, 256, kernel_size=1)
+        self.top_down_layer_1 = ELANBlockFPN(in_dim = 256 + 256,
+                                             out_dim = 256,
+                                             expansion = 0.5,
+                                             branch_width = self.branch_width,
+                                             branch_depth = self.branch_depth,
                                              )
         ## P4 -> P3
-        self.reduce_layer_3 = ConvModule(round(256*cfg.width), round(128*cfg.width), kernel_size=1)
-        self.reduce_layer_4 = ConvModule(c3, round(128*cfg.width), kernel_size=1)
-        self.top_down_layer_2 = ELANLayerFPN(in_dim       = round(128*cfg.width) + round(128*cfg.width),
-                                             out_dim      = round(128*cfg.width),
-                                             expansions   = cfg.fpn_expansions,
-                                             branch_width = cfg.fpn_block_bw,
-                                             branch_depth = cfg.fpn_block_dw,
+        self.reduce_layer_3 = ConvModule(256, 128, kernel_size=1)
+        self.reduce_layer_4 = ConvModule(c3, 128, kernel_size=1)
+        self.top_down_layer_2 = ELANBlockFPN(in_dim = 128 + 128,
+                                             out_dim = 128,
+                                             expansion = 0.5,
+                                             branch_width = self.branch_width,
+                                             branch_depth = self.branch_depth,
                                              )
-        # ----------------------------- Yolov7's Bottom-up PAN -----------------------------
+        # ----------------------------- Bottom-up FPN -----------------------------
         ## P3 -> P4
-        self.downsample_layer_1 = MDown(round(128*cfg.width), round(256*cfg.width))
-        self.bottom_up_layer_1 = ELANLayerFPN(in_dim       = round(256*cfg.width) + round(256*cfg.width),
-                                              out_dim      = round(256*cfg.width),
-                                              expansions   = cfg.fpn_expansions,
-                                              branch_width = cfg.fpn_block_bw,
-                                              branch_depth = cfg.fpn_block_dw,
+        self.downsample_layer_1 = DownSample(128, 256)
+        self.bottom_up_layer_1 = ELANBlockFPN(in_dim = 256 + 256,
+                                              out_dim = 256,
+                                              expansion = 0.5,
+                                              branch_width = self.branch_width,
+                                              branch_depth = self.branch_depth,
                                               )
         ## P4 -> P5
-        self.downsample_layer_2 = MDown(round(256*cfg.width), round(512*cfg.width))
-        self.bottom_up_layer_2 = ELANLayerFPN(in_dim       = round(512*cfg.width) + c5,
-                                              out_dim      = round(512*cfg.width),
-                                              expansions   = cfg.fpn_expansions,
-                                              branch_width = cfg.fpn_block_bw,
-                                              branch_depth = cfg.fpn_block_dw,
+        self.downsample_layer_2 = DownSample(256, 512)
+        self.bottom_up_layer_2 = ELANBlockFPN(in_dim = 512 + c5,
+                                              out_dim = 512,
+                                              expansion = 0.5,
+                                              branch_width = self.branch_width,
+                                              branch_depth = self.branch_depth,
                                               )
 
-        # ----------------------------- Head conv layers -----------------------------
         ## Head convs
-        self.head_conv_1 = ConvModule(round(128*cfg.width), round(256*cfg.width), kernel_size=3, padding=1, stride=1)
-        self.head_conv_2 = ConvModule(round(256*cfg.width), round(512*cfg.width), kernel_size=3, padding=1, stride=1)
-        self.head_conv_3 = ConvModule(round(512*cfg.width), round(1024*cfg.width), kernel_size=3, padding=1, stride=1)
-
-        # ---------------------- Yolox's output projection ----------------------
-        self.out_layers = nn.ModuleList([
-            ConvModule(in_dim, round(cfg.head_dim*cfg.width), kernel_size=1)
-                      for in_dim in [round(256*cfg.width), round(512*cfg.width), round(1024*cfg.width)]
-                      ])
-        self.out_dims = [round(cfg.head_dim*cfg.width)] * 3
-
-        # Initialize all layers
-        self.init_weights()
-
-    def init_weights(self):
-        """Initialize the parameters."""
-        for m in self.modules():
-            if isinstance(m, torch.nn.Conv2d):
-                m.reset_parameters()
+        self.head_conv_1 = ConvModule(128, 256, kernel_size=3, stride=1)
+        self.head_conv_2 = ConvModule(256, 512, kernel_size=3, stride=1)
+        self.head_conv_3 = ConvModule(512, 1024, kernel_size=3, stride=1)
+
+        ## Output projs
+        self.out_layers = nn.ModuleList([ConvModule(in_dim, head_dim, kernel_size=1)
+                                         for in_dim in [256, 512, 1024]
+                                         ])
 
     def forward(self, features):
         c3, c4, c5 = features
 
-        # ------------------ Top down FPN ------------------
+        # Top down
         ## P5 -> P4
-        p5 = self.reduce_layer_1(c5)
-        p5_up = F.interpolate(p5, scale_factor=2.0)
-        p4 = self.reduce_layer_2(c4)
-        p4 = self.top_down_layer_1(torch.cat([p5_up, p4], dim=1))
-
+        c6 = self.reduce_layer_1(c5)
+        c7 = F.interpolate(c6, scale_factor=2.0)
+        c8 = torch.cat([c7, self.reduce_layer_2(c4)], dim=1)
+        c9 = self.top_down_layer_1(c8)
         ## P4 -> P3
-        p4_in = self.reduce_layer_3(p4)
-        p4_up = F.interpolate(p4_in, scale_factor=2.0)
-        p3 = self.reduce_layer_4(c3)
-        p3 = self.top_down_layer_2(torch.cat([p4_up, p3], dim=1))
-
-        # ------------------ Bottom up PAN ------------------
-        ## P3 -> P4
-        p3_ds = self.downsample_layer_1(p3)
-        p4 = torch.cat([p3_ds, p4], dim=1)
-        p4 = self.bottom_up_layer_1(p4)
-
+        c10 = self.reduce_layer_3(c9)
+        c11 = F.interpolate(c10, scale_factor=2.0)
+        c12 = torch.cat([c11, self.reduce_layer_4(c3)], dim=1)
+        c13 = self.top_down_layer_2(c12)
+
+        # Bottom up
+        ## p3 -> P4
+        c14 = self.downsample_layer_1(c13)
+        c15 = torch.cat([c14, c9], dim=1)
+        c16 = self.bottom_up_layer_1(c15)
         ## P4 -> P5
-        p4_ds = self.downsample_layer_2(p4)
-        p5 = torch.cat([p4_ds, c5], dim=1)
-        p5 = self.bottom_up_layer_2(p5)
-
-        out_feats = [self.head_conv_1(p3), self.head_conv_2(p4), self.head_conv_3(p5)]
-            
+        c17 = self.downsample_layer_2(c16)
+        c18 = torch.cat([c17, c5], dim=1)
+        c19 = self.bottom_up_layer_2(c18)
+
+        c20 = self.head_conv_1(c13)
+        c21 = self.head_conv_2(c16)
+        c22 = self.head_conv_3(c19)
+        out_feats = [c20, c21, c22] # [P3, P4, P5]
+        
         # output proj layers
         out_feats_proj = []
         for feat, layer in zip(out_feats, self.out_layers):
             out_feats_proj.append(layer(feat))
-            
         return out_feats_proj
 
 
@@ -118,30 +114,16 @@ if __name__=='__main__':
     from thop import profile
     # Model config
     
-    # YOLOv7-Base config
-    class Yolov7BaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.width    = 0.50
-            self.depth    = 0.34
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            self.num_levels = 3
-            self.fpn_expansions = [0.5, 0.5]
-            self.fpn_block_bw = 4
-            self.fpn_block_dw = 1
-            ## Head
-            self.head_dim = 256
-
-    cfg = Yolov7BaseConfig()
     # Build a head
     in_dims  = [128, 256, 512]
-    fpn = Yolov7PaFPN(cfg, in_dims)
+    fpn = Yolov7PaFPN(in_dims, head_dim=256)
 
-    # Inference
+    # Randomly generate a input data
     x = [torch.randn(1, in_dims[0], 80, 80),
          torch.randn(1, in_dims[1], 40, 40),
          torch.randn(1, in_dims[2], 20, 20)]
+    
+    # Inference
     t0 = time.time()
     output = fpn(x)
     t1 = time.time()
@@ -153,4 +135,4 @@ if __name__=='__main__':
     flops, params = profile(fpn, inputs=(x, ), verbose=False)
     print('==============================')
     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    print('Params : {:.2f} M'.format(params / 1e6))

+ 0 - 196
yolo/models/yolov7/yolov7_pred.py

@@ -1,196 +0,0 @@
-import torch
-import torch.nn as nn
-
-
-# -------------------- Detection Pred Layer --------------------
-class DetPredLayer(nn.Module):
-    def __init__(self,
-                 cls_dim      :int,
-                 reg_dim      :int,
-                 stride       :int,
-                 num_classes  :int,
-                 ):
-        super().__init__()
-        # --------- Basic Parameters ----------
-        self.stride  = stride
-        self.cls_dim = cls_dim
-        self.reg_dim = reg_dim
-        self.num_classes = num_classes
-
-        # --------- Network Parameters ----------
-        self.obj_pred = nn.Conv2d(self.cls_dim, 1, kernel_size=1)
-        self.cls_pred = nn.Conv2d(self.cls_dim, num_classes, kernel_size=1)
-        self.reg_pred = nn.Conv2d(self.reg_dim, 4, kernel_size=1)                
-
-        self.init_bias()
-        
-    def init_bias(self):
-        # Init bias
-        init_prob = 0.01
-        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
-        # obj pred
-        b = self.obj_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        self.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        # cls pred
-        b = self.cls_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        self.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        # reg pred
-        b = self.reg_pred.bias.view(-1, )
-        b.data.fill_(1.0)
-        self.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        w = self.reg_pred.weight
-        w.data.fill_(0.)
-        self.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
-
-    def generate_anchors(self, fmp_size):
-        """
-            fmp_size: (List) [H, W]
-        """
-        fmp_h, fmp_w = fmp_size
-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
-
-        # [H, W, 2] -> [HW, 2]
-        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
-        anchors = anchors + 0.5
-        anchors = anchors * self.stride
-
-        return anchors
-        
-    def forward(self, cls_feat, reg_feat):
-        # 预测层
-        obj_pred = self.obj_pred(reg_feat)
-        cls_pred = self.cls_pred(cls_feat)
-        reg_pred = self.reg_pred(reg_feat)
-
-        # 生成网格坐标
-        B, _, H, W = cls_pred.size()
-        fmp_size = [H, W]
-        anchors = self.generate_anchors(fmp_size)
-        anchors = anchors.to(cls_pred.device)
-
-        # 对 pred 的size做一些view调整,便于后续的处理
-        # [B, C, H, W] -> [B, H, W, C] -> [B, H*W, C]
-        obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
-        cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
-        reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
-        
-        # 解算边界框坐标
-        cxcy_pred = reg_pred[..., :2] * self.stride + anchors
-        bwbh_pred = torch.exp(reg_pred[..., 2:]) * self.stride
-        pred_x1y1 = cxcy_pred - bwbh_pred * 0.5
-        pred_x2y2 = cxcy_pred + bwbh_pred * 0.5
-        box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
-
-        # output dict
-        outputs = {"pred_obj": obj_pred,       # (torch.Tensor) [B, M, 1]
-                   "pred_cls": cls_pred,       # (torch.Tensor) [B, M, C]
-                   "pred_reg": reg_pred,       # (torch.Tensor) [B, M, 4]
-                   "pred_box": box_pred,       # (torch.Tensor) [B, M, 4]
-                   "anchors" : anchors,        # (torch.Tensor) [M, 2]
-                   "fmp_size": fmp_size,
-                   "stride"  : self.stride,    # (Int)
-                   }
-
-        return outputs
-
-class Yolov7DetPredLayer(nn.Module):
-    def __init__(self, cfg):
-        super().__init__()
-        # --------- Basic Parameters ----------
-        self.cfg = cfg
-        self.num_levels = len(cfg.out_stride)
-
-        # ----------- Network Parameters -----------
-        ## pred layers
-        self.multi_level_preds = nn.ModuleList(
-            [DetPredLayer(cls_dim      = round(cfg.head_dim * cfg.width),
-                          reg_dim      = round(cfg.head_dim * cfg.width),
-                          stride       = cfg.out_stride[level],
-                          num_classes  = cfg.num_classes,)
-                          for level in range(self.num_levels)
-                          ])
-
-    def forward(self, cls_feats, reg_feats):
-        all_anchors = []
-        all_fmp_sizes = []
-        all_obj_preds = []
-        all_cls_preds = []
-        all_reg_preds = []
-        all_box_preds = []
-        for level in range(self.num_levels):
-            # -------------- Single-level prediction --------------
-            outputs = self.multi_level_preds[level](cls_feats[level], reg_feats[level])
-
-            # collect results
-            all_obj_preds.append(outputs["pred_obj"])
-            all_cls_preds.append(outputs["pred_cls"])
-            all_reg_preds.append(outputs["pred_reg"])
-            all_box_preds.append(outputs["pred_box"])
-            all_fmp_sizes.append(outputs["fmp_size"])
-            all_anchors.append(outputs["anchors"])
-        
-        # output dict
-        outputs = {"pred_obj":  all_obj_preds,         # List(Tensor) [B, M, 1]
-                   "pred_cls":  all_cls_preds,         # List(Tensor) [B, M, C]
-                   "pred_reg":  all_reg_preds,         # List(Tensor) [B, M, 4*(reg_max)]
-                   "pred_box":  all_box_preds,         # List(Tensor) [B, M, 4]
-                   "fmp_sizes": all_fmp_sizes,         # List(Tensor) [M, 1]
-                   "anchors":   all_anchors,           # List(Tensor) [M, 2]
-                   "strides":   self.cfg.out_stride,   # List(Int) = [8, 16, 32]
-                   }
-
-        return outputs
-
-
-if __name__=='__main__':
-    import time
-    from thop import profile
-    # Model config
-    
-    # YOLOv7AF-Base config
-    class Yolov7AFBaseConfig(object):
-        def __init__(self) -> None:
-            # ---------------- Model config ----------------
-            self.width    = 1.0
-            self.depth    = 1.0
-            self.out_stride = [8, 16, 32]
-            self.max_stride = 32
-            ## Head
-            self.head_dim  = 256
-
-    cfg = Yolov7AFBaseConfig()
-    cfg.num_classes = 20
-    # Build a pred layer
-    pred = Yolov7DetPredLayer(cfg)
-
-    # Inference
-    cls_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                 torch.randn(1, cfg.head_dim, 40, 40),
-                 torch.randn(1, cfg.head_dim, 20, 20),]
-    reg_feats = [torch.randn(1, cfg.head_dim, 80, 80),
-                 torch.randn(1, cfg.head_dim, 40, 40),
-                 torch.randn(1, cfg.head_dim, 20, 20),]
-    t0 = time.time()
-    output = pred(cls_feats, reg_feats)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('====== Pred output ======= ')
-    pred_obj = output["pred_obj"]
-    pred_cls = output["pred_cls"]
-    pred_reg = output["pred_reg"]
-    pred_box = output["pred_box"]
-    anchors  = output["anchors"]
-    
-    for level in range(len(cfg.out_stride)):
-        print("- Level-{} : objectness       -> {}".format(level, pred_obj[level].shape))
-        print("- Level-{} : classification   -> {}".format(level, pred_cls[level].shape))
-        print("- Level-{} : delta regression -> {}".format(level, pred_reg[level].shape))
-        print("- Level-{} : bbox regression  -> {}".format(level, pred_box[level].shape))
-        print("- Level-{} : anchor boxes     -> {}".format(level, anchors[level].shape))
-
-    flops, params = profile(pred, inputs=(cls_feats, reg_feats, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))

+ 1 - 1
yolo/train.py

@@ -23,7 +23,7 @@ from config import build_config
 from dataset.build import build_dataset, build_transform
 
 # ----------------- Evaluator Components -----------------
-from evaluator.map_evaluator import MapEvaluator
+from map_evaluator import MapEvaluator
 
 # ----------------- Model Components -----------------
 from models import build_model