11 months ago · 5fbdb8f277
--- a/yolo/config/__init__.py
+++ b/yolo/config/__init__.py
@@ -9,6 +9,7 @@ from .yolov6_config  import build_yolov6_config
 
															 from .yolov7_config  import build_yolov7_config
														
 
															 from .yolov8_config  import build_yolov8_config
														
 
															 from .yolov9_config  import build_yolov9_config
														
 
															+from .yolo11_config  import build_yolo11_config
														
 
															 from .yolof_config   import build_yolof_config
														
 
															 from .fcos_config    import build_fcos_config
														
@@ -39,6 +40,8 @@ def build_config(args):
 
															         cfg = build_yolov8_config(args)
														
 
															     elif 'yolov9' in args.model:
														
 
															         cfg = build_yolov9_config(args)
														
 
															+    elif 'yolo11' in args.model:
														
 
															+        cfg = build_yolo11_config(args)
														
 
															     # ----------- RT-DETR -----------
														
 
															     elif 'yolof' in args.model:
														
--- a/yolo/config/yolo11_config.py
+++ b/yolo/config/yolo11_config.py
@@ -0,0 +1,209 @@
 
															+# yolo Config
														
 
															+
														
 
															+
														
 
															+def build_yolo11_config(args):
														
 
															+    if   args.model == 'yolo11_n':
														
 
															+        return Yolo11NConfig()
														
 
															+    elif args.model == 'yolo11_s':
														
 
															+        return Yolo11SConfig()
														
 
															+    elif args.model == 'yolo11_m':
														
 
															+        return Yolo11MConfig()
														
 
															+    elif args.model == 'yolo11_l':
														
 
															+        return Yolo11LConfig()
														
 
															+    elif args.model == 'yolo11_x':
														
 
															+        return Yolo11XConfig()
														
 
															+    else:
														
 
															+        raise NotImplementedError("No config for model: {}".format(args.model))
														
 
															+    
														
 
															+# YOLO11-Base config
														
 
															+class Yolo11BaseConfig(object):
														
 
															+    def __init__(self) -> None:
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "l"
														
 
															+        self.width   = 1.0
														
 
															+        self.depth   = 1.0
														
 
															+        self.ratio   = 1.0
														
 
															+        self.reg_max = 16
														
 
															+
														
 
															+        self.out_stride = [8, 16, 32]
														
 
															+        self.max_stride = 32
														
 
															+
														
 
															+        # ---------------- Post-process config ----------------
														
 
															+        ## Post process
														
 
															+        self.val_topk = 1000
														
 
															+        self.val_conf_thresh = 0.001
														
 
															+        self.val_nms_thresh  = 0.7
														
 
															+        self.test_topk = 100
														
 
															+        self.test_conf_thresh = 0.2
														
 
															+        self.test_nms_thresh  = 0.5
														
 
															+
														
 
															+        # ---------------- Assignment config ----------------
														
 
															+        ## Matcher
														
 
															+        self.tal_topk_candidates = 10
														
 
															+        self.tal_alpha = 0.5
														
 
															+        self.tal_beta  = 6.0
														
 
															+        ## Loss weight
														
 
															+        self.loss_cls = 0.5
														
 
															+        self.loss_box = 7.5
														
 
															+        self.loss_dfl = 1.5
														
 
															+
														
 
															+        # ---------------- ModelEMA config ----------------
														
 
															+        self.use_ema = True
														
 
															+        self.ema_decay = 0.9998
														
 
															+        self.ema_tau   = 2000
														
 
															+
														
 
															+        # ---------------- Optimizer config ----------------
														
 
															+        self.trainer      = 'yolo'
														
 
															+        self.optimizer    = 'adamw'
														
 
															+        self.base_lr      = 0.001     # base_lr = per_image_lr * batch_size
														
 
															+        self.min_lr_ratio = 0.01      # min_lr  = base_lr * min_lr_ratio
														
 
															+        self.batch_size_base = 64
														
 
															+        self.momentum     = 0.9
														
 
															+        self.weight_decay = 0.05
														
 
															+        self.clip_max_norm   = 35.0
														
 
															+        self.warmup_bias_lr  = 0.1
														
 
															+        self.warmup_momentum = 0.8
														
 
															+
														
 
															+        # ---------------- Lr Scheduler config ----------------
														
 
															+        self.warmup_epoch = 3
														
 
															+        self.lr_scheduler = "cosine"
														
 
															+        self.max_epoch    = 500
														
 
															+        self.eval_epoch   = 10
														
 
															+        self.no_aug_epoch = 20
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.aug_type = 'yolo'
														
 
															+        self.mosaic_prob = 0.0
														
 
															+        self.mixup_prob  = 0.0
														
 
															+        self.copy_paste  = 0.0           # approximated by the YOLOX's mixup
														
 
															+        self.multi_scale = [0.5, 1.5]   # multi scale: [img_size * 0.5, img_size * 1.5]
														
 
															+        ## Pixel mean & std
														
 
															+        self.pixel_mean = [0., 0., 0.]
														
 
															+        self.pixel_std  = [255., 255., 255.]
														
 
															+        ## Transforms
														
 
															+        self.train_img_size = 640
														
 
															+        self.test_img_size  = 640
														
 
															+        self.affine_params = {
														
 
															+            'degrees': 0.0,
														
 
															+            'translate': 0.2,
														
 
															+            'scale': [0.1, 2.0],
														
 
															+            'shear': 0.0,
														
 
															+            'perspective': 0.0,
														
 
															+            'hsv_h': 0.015,
														
 
															+            'hsv_s': 0.7,
														
 
															+            'hsv_v': 0.4,
														
 
															+        }
														
 
															+
														
 
															+    def print_config(self):
														
 
															+        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
														
 
															+        for k, v in config_dict.items():
														
 
															+            print("{} : {}".format(k, v))
														
 
															+
														
 
															+# YOLO11-N
														
 
															+class Yolo11NConfig(Yolo11BaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "n"
														
 
															+        self.width = 0.25
														
 
															+        self.depth = 0.50
														
 
															+        self.ratio = 2.0
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.mosaic_prob = 1.0
														
 
															+        self.mixup_prob  = 0.0
														
 
															+        self.copy_paste  = 0.0
														
 
															+
														
 
															+# YOLO11-S
														
 
															+class Yolo11SConfig(Yolo11BaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "s"
														
 
															+        self.width = 0.50
														
 
															+        self.depth = 0.50
														
 
															+        self.ratio = 2.0
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.mosaic_prob = 1.0
														
 
															+        self.mixup_prob  = 0.0
														
 
															+        self.copy_paste  = 1.0
														
 
															+
														
 
															+# YOLO11-M
														
 
															+class Yolo11MConfig(Yolo11BaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "m"
														
 
															+        self.width = 1.0
														
 
															+        self.depth = 0.5
														
 
															+        self.ratio = 1.0
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.mosaic_prob = 1.0
														
 
															+        self.mixup_prob  = 0.1
														
 
															+        self.copy_paste  = 1.0
														
 
															+
														
 
															+# YOLO11-L
														
 
															+class Yolo11LConfig(Yolo11BaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "l"
														
 
															+        self.width = 1.0
														
 
															+        self.depth = 1.0
														
 
															+        self.ratio = 1.0
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.mosaic_prob = 1.0
														
 
															+        self.mixup_prob  = 0.1
														
 
															+        self.copy_paste  = 1.0
														
 
															+
														
 
															+        # ---------------- ModelEMA config ----------------
														
 
															+        self.use_ema = True
														
 
															+        self.ema_decay = 0.9999
														
 
															+        self.ema_tau   = 2000
														
 
															+
														
 
															+        # ---------------- Optimizer config ----------------
														
 
															+        self.trainer      = 'yolo'
														
 
															+        self.optimizer    = 'sgd'
														
 
															+        self.base_lr      = 0.01     # base_lr = per_image_lr * batch_size
														
 
															+        self.min_lr_ratio = 0.01      # min_lr  = base_lr * min_lr_ratio
														
 
															+        self.batch_size_base = 64
														
 
															+        self.momentum     = 0.9
														
 
															+        self.weight_decay = 0.0005
														
 
															+        self.clip_max_norm   = 10.0
														
 
															+        self.warmup_bias_lr  = 0.1
														
 
															+        self.warmup_momentum = 0.8
														
 
															+
														
 
															+# YOLO11-X
														
 
															+class Yolo11XConfig(Yolo11BaseConfig):
														
 
															+    def __init__(self) -> None:
														
 
															+        super().__init__()
														
 
															+        # ---------------- Model config ----------------
														
 
															+        self.model_scale = "x"
														
 
															+        self.width = 1.50
														
 
															+        self.depth = 1.0
														
 
															+        self.ratio = 1.0
														
 
															+
														
 
															+        # ---------------- Data process config ----------------
														
 
															+        self.mosaic_prob = 1.0
														
 
															+        self.mixup_prob  = 0.1
														
 
															+        self.copy_paste  = 1.0
														
 
															+
														
 
															+        # ---------------- ModelEMA config ----------------
														
 
															+        self.use_ema = True
														
 
															+        self.ema_decay = 0.9999
														
 
															+        self.ema_tau   = 2000
														
 
															+
														
 
															+        # ---------------- Optimizer config ----------------
														
 
															+        self.trainer      = 'yolo'
														
 
															+        self.optimizer    = 'sgd'
														
 
															+        self.base_lr      = 0.01     # base_lr = per_image_lr * batch_size
														
 
															+        self.min_lr_ratio = 0.01      # min_lr  = base_lr * min_lr_ratio
														
 
															+        self.batch_size_base = 64
														
 
															+        self.momentum     = 0.9
														
 
															+        self.weight_decay = 0.0005
														
 
															+        self.clip_max_norm   = 10.0
														
 
															+        self.warmup_bias_lr  = 0.1
														
 
															+        self.warmup_momentum = 0.8
														
--- a/yolo/config/yolov11_config.py
+++ b/yolo/config/yolov11_config.py
--- a/yolo/models/__init__.py
+++ b/yolo/models/__init__.py
@@ -12,6 +12,7 @@ from .yolov6.build import build_yolov6
 
															 from .yolov7.build import build_yolov7
														
 
															 from .yolov8.build import build_yolov8
														
 
															 from .yolov9.build import build_gelan
														
 
															+from .yolo11.build import build_yolo11
														
 
															 from .yolof.build  import build_yolof
														
 
															 from .fcos.build   import build_fcos
														
@@ -51,6 +52,9 @@ def build_model(args, cfg, is_val=False):
 
															     ## GElan
														
 
															     elif 'yolov9' in args.model:
														
 
															         model, criterion = build_gelan(cfg, is_val)
														
 
															+    ## YOLO11
														
 
															+    elif 'yolo11' in args.model:
														
 
															+        model, criterion = build_yolo11(cfg, is_val)
														
 
															     ## Yolof
														
 
															     elif 'yolof' in args.model:
														
--- a/yolo/models/yolo11/modules.py
+++ b/yolo/models/yolo11/modules.py
@@ -66,39 +66,24 @@ class C3kBlock(nn.Module):
 
															     def forward(self, x):
														
 
															         return self.cv3(torch.cat([self.m(self.cv1(x)), self.cv2(x)], dim=1))
														
 
															-class C3k2fBlock(nn.Module):
														
 
															-    def __init__(self, in_dim, out_dim, num_blocks=1, use_c3k=True, expansion=0.5, shortcut=True):
														
 
															+class SPPF(nn.Module):
														
 
															+    def __init__(self, in_dim, out_dim, spp_pooling_size: int = 5, neck_expand_ratio:float = 0.5):
														
 
															         super().__init__()
														
 
															-        inter_dim = int(out_dim * expansion)  # hidden channels
														
 
															-        self.cv1 = ConvModule(in_dim, 2 * inter_dim, kernel_size=1)
														
 
															-        self.cv2 = ConvModule((2 + num_blocks) * inter_dim, out_dim, kernel_size=1)
														
 
															-
														
 
															-        if use_c3k:
														
 
															-            self.m = nn.ModuleList(
														
 
															-                C3kBlock(inter_dim, inter_dim, 2, shortcut)
														
 
															-                for _ in range(num_blocks)
														
 
															-            )
														
 
															-        else:
														
 
															-            self.m = nn.ModuleList(
														
 
															-                Bottleneck(inter_dim, inter_dim, [3, 3], shortcut, expansion=0.5)
														
 
															-                for _ in range(num_blocks)
														
 
															-            )
														
 
															-
														
 
															-    def _forward_impl(self, x):
														
 
															-        # Input proj
														
 
															-        x1, x2 = torch.chunk(self.cv1(x), 2, dim=1)
														
 
															-        out = list([x1, x2])
														
 
															-
														
 
															-        # Bottlenecl
														
 
															-        out.extend(m(out[-1]) for m in self.m)
														
 
															-
														
 
															-        # Output proj
														
 
															-        out = self.cv2(torch.cat(out, dim=1))
														
 
															-
														
 
															-        return out
														
 
															+        ## ----------- Basic Parameters -----------
														
 
															+        inter_dim = round(in_dim * neck_expand_ratio)
														
 
															+        self.out_dim = out_dim
														
 
															+        ## ----------- Network Parameters -----------
														
 
															+        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1, stride=1)
														
 
															+        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1, stride=1)
														
 
															+        self.m = nn.MaxPool2d(kernel_size=spp_pooling_size, stride=1, padding=spp_pooling_size // 2)
														
 
															     def forward(self, x):
														
 
															-        return self._forward_impl(x)
														
 
															+        x = self.cv1(x)
														
 
															+        y1 = self.m(x)
														
 
															+        y2 = self.m(y1)
														
 
															+
														
 
															+        return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
														
 
															+    
														
 
															 # ----------------- Attention modules  -----------------
														
 
															 class Attention(nn.Module):
														
@@ -143,3 +128,88 @@ class PSABlock(nn.Module):
 
															         x = x + self.attn(x) if self.add else self.attn(x)
														
 
															         x = x + self.ffn(x)  if self.add else self.ffn(x)
														
 
															         return x
														
 
															+
														
 
															+class C2PSA(nn.Module):
														
 
															+    def __init__(self, in_dim, out_dim, num_blocks=1, expansion=0.5):
														
 
															+        super().__init__()
														
 
															+        assert in_dim == out_dim
														
 
															+        inter_dim = int(in_dim * expansion)
														
 
															+        self.cv1 = ConvModule(in_dim, 2 * inter_dim, kernel_size=1)
														
 
															+        self.cv2 = ConvModule(2 * inter_dim, in_dim, kernel_size=1)
														
 
															+        self.m = nn.Sequential(*[
														
 
															+            PSABlock(in_dim     = inter_dim,
														
 
															+                     attn_ratio = 0.5,
														
 
															+                     num_heads  = inter_dim // 64
														
 
															+                     ) for _ in range(num_blocks)])
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        x1, x2 = torch.chunk(self.cv1(x), chunks=2, dim=1)
														
 
															+        x2 = self.m(x2)
														
 
															+
														
 
															+        return self.cv2(torch.cat([x1, x2], dim=1))
														
 
															+
														
 
															+
														
 
															+# ----------------- YOLO11 components -----------------
														
 
															+class YoloStage(nn.Module):
														
 
															+    def __init__(self, in_dim, out_dim, num_blocks=1, use_c3k=True, expansion=0.5, shortcut=True):
														
 
															+        super().__init__()
														
 
															+        inter_dim = int(out_dim * expansion)  # hidden channels
														
 
															+        self.cv1 = ConvModule(in_dim, 2 * inter_dim, kernel_size=1)
														
 
															+        self.cv2 = ConvModule((2 + num_blocks) * inter_dim, out_dim, kernel_size=1)
														
 
															+
														
 
															+        if use_c3k:
														
 
															+            self.m = nn.ModuleList(
														
 
															+                C3kBlock(inter_dim, inter_dim, 2, shortcut)
														
 
															+                for _ in range(num_blocks)
														
 
															+            )
														
 
															+        else:
														
 
															+            self.m = nn.ModuleList(
														
 
															+                Bottleneck(inter_dim, inter_dim, [3, 3], shortcut, expansion=0.5)
														
 
															+                for _ in range(num_blocks)
														
 
															+            )
														
 
															+
														
 
															+    def _forward_impl(self, x):
														
 
															+        # Input proj
														
 
															+        x1, x2 = torch.chunk(self.cv1(x), 2, dim=1)
														
 
															+        out = list([x1, x2])
														
 
															+
														
 
															+        # Bottlenecl
														
 
															+        out.extend(m(out[-1]) for m in self.m)
														
 
															+
														
 
															+        # Output proj
														
 
															+        out = self.cv2(torch.cat(out, dim=1))
														
 
															+
														
 
															+        return out
														
 
															+
														
 
															+    def forward(self, x):
														
 
															+        return self._forward_impl(x)
														
 
															+
														
 
															+class DflLayer(nn.Module):
														
 
															+    def __init__(self, reg_max=16):
														
 
															+        """Initialize a convolutional layer with a given number of input channels."""
														
 
															+        super().__init__()
														
 
															+        self.reg_max = reg_max
														
 
															+        proj_init = torch.arange(reg_max, dtype=torch.float)
														
 
															+        self.proj_weight = nn.Parameter(proj_init.view([1, reg_max, 1, 1]), requires_grad=False)
														
 
															+
														
 
															+    def forward(self, pred_reg, anchor, stride):
														
 
															+        bs, hw = pred_reg.shape[:2]
														
 
															+        # [bs, hw, 4*rm] -> [bs, 4*rm, hw] -> [bs, 4, rm, hw]
														
 
															+        pred_reg = pred_reg.permute(0, 2, 1).reshape(bs, 4, -1, hw)
														
 
															+
														
 
															+        # [bs, 4, rm, hw] -> [bs, rm, 4, hw]
														
 
															+        pred_reg = pred_reg.permute(0, 2, 1, 3).contiguous()
														
 
															+
														
 
															+        # [bs, rm, 4, hw] -> [bs, 1, 4, hw]
														
 
															+        delta_pred = F.conv2d(F.softmax(pred_reg, dim=1), self.proj_weight)
														
 
															+
														
 
															+        # [bs, 1, 4, hw] -> [bs, 4, hw] -> [bs, hw, 4]
														
 
															+        delta_pred = delta_pred.view(bs, 4, hw).permute(0, 2, 1).contiguous()
														
 
															+        delta_pred *= stride
														
 
															+
														
 
															+        # Decode bbox: tlbr -> xyxy
														
 
															+        x1y1_pred = anchor - delta_pred[..., :2]
														
 
															+        x2y2_pred = anchor + delta_pred[..., 2:]
														
 
															+        box_pred = torch.cat([x1y1_pred, x2y2_pred], dim=-1)
														
 
															+
														
 
															+        return box_pred
														
--- a/yolo/models/yolo11/yolo11.py
+++ b/yolo/models/yolo11/yolo11.py
@@ -4,15 +4,11 @@ import torch.nn as nn
 
															 # --------------- Model components ---------------
														
 
															 from .yolo11_backbone import Yolo11Backbone
														
 
															-from .yolo11_neck     import SPPF, C2PSA
														
 
															 from .yolo11_pafpn    import Yolo11PaFPN
														
 
															 from .yolo11_head     import Yolo11DetHead
														
 
															-from .yolo11_pred     import Yolo11DetPredLayer
														
 
															-# --------------- External components ---------------
														
 
															 from utils.misc import multiclass_nms
														
 
															-
														
 
															 # YOLO11
														
 
															 class Yolo11(nn.Module):
														
 
															     def __init__(self,
														
@@ -28,24 +24,10 @@ class Yolo11(nn.Module):
 
															         self.conf_thresh      = cfg.val_conf_thresh if is_val else cfg.test_conf_thresh
														
 
															         self.nms_thresh       = cfg.val_nms_thresh  if is_val else cfg.test_nms_thresh
														
 
															         self.no_multi_labels  = False if is_val else True
														
 
															-        
														
 
															-        # ---------------------- Network Parameters ----------------------
														
 
															-        ## Backbone
														
 
															-        self.backbone = Yolo11Backbone(cfg)
														
 
															-        self.pyramid_feat_dims = self.backbone.feat_dims[-3:]
														
 
															-
														
 
															-        ## Neck
														
 
															-        self.neck_spp  = SPPF(self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1])
														
 
															-        self.neck_attn = C2PSA(self.pyramid_feat_dims[-1], self.pyramid_feat_dims[-1], num_blocks=int(2 * cfg.depth), expansion=0.5)
														
 
															-        
														
 
															-        ## Neck: PaFPN
														
 
															-        self.fpn = Yolo11PaFPN(cfg, self.backbone.feat_dims)
														
 
															-        ## Head
														
 
															-        self.head = Yolo11DetHead(cfg, self.fpn.out_dims)
														
 
															-
														
 
															-        ## Pred
														
 
															-        self.pred = Yolo11DetPredLayer(cfg, self.head.cls_head_dim, self.head.reg_head_dim)
														
 
															+        self.backbone = Yolo11Backbone(cfg)
														
 
															+        self.pafpn    = Yolo11PaFPN(cfg, self.backbone.feat_dims[-3:])
														
 
															+        self.det_head = Yolo11DetHead(cfg, self.pafpn.out_dims)
														
 
															     def post_process(self, cls_preds, box_preds):
														
 
															         """
														
@@ -126,20 +108,9 @@ class Yolo11(nn.Module):
 
															         return bboxes, scores, labels
														
 
															     def forward(self, x):
														
 
															-        # ---------------- Backbone ----------------
														
 
															         pyramid_feats = self.backbone(x)
														
 
															-        # ---------------- Neck: SPP ----------------
														
 
															-        pyramid_feats[-1] = self.neck_spp(pyramid_feats[-1])
														
 
															-        pyramid_feats[-1] = self.neck_attn(pyramid_feats[-1])
														
 
															-
														
 
															-        # ---------------- Neck: PaFPN ----------------
														
 
															-        pyramid_feats = self.fpn(pyramid_feats)
														
 
															-
														
 
															-        # ---------------- Heads ----------------
														
 
															-        cls_feats, reg_feats = self.head(pyramid_feats)
														
 
															-
														
 
															-        # ---------------- Preds ----------------
														
 
															-        outputs = self.pred(cls_feats, reg_feats)
														
 
															+        pyramid_feats = self.pafpn(pyramid_feats)
														
 
															+        outputs = self.det_head(pyramid_feats)
														
 
															         outputs['image_size'] = [x.shape[2], x.shape[3]]
														
 
															         if not self.training:
														
--- a/yolo/models/yolo11/yolo11_backbone.py
+++ b/yolo/models/yolo11/yolo11_backbone.py
@@ -2,9 +2,9 @@ import torch
 
															 import torch.nn as nn
														
 
															 try:
														
 
															-    from .modules import ConvModule, C3k2fBlock
														
 
															+    from .modules import ConvModule, YoloStage, SPPF, C2PSA
														
 
															 except:
														
 
															-    from  modules import ConvModule, C3k2fBlock
														
 
															+    from  modules import ConvModule, YoloStage, SPPF, C2PSA
														
 
															 # ---------------------------- YOLO11 Backbone ----------------------------
														
@@ -21,7 +21,7 @@ class Yolo11Backbone(nn.Module):
 
															         # P2/4
														
 
															         self.layer_2 = nn.Sequential(
														
 
															             ConvModule(int(64 * cfg.width), int(128 * cfg.width), kernel_size=3, stride=2),
														
 
															-            C3k2fBlock(in_dim     = int(128 * cfg.width),
														
 
															+            YoloStage(in_dim     = int(128 * cfg.width),
														
 
															                       out_dim    = int(256 * cfg.width),
														
 
															                       num_blocks = round(2*cfg.depth),
														
 
															                       shortcut   = True,
														
@@ -32,7 +32,7 @@ class Yolo11Backbone(nn.Module):
 
															         # P3/8
														
 
															         self.layer_3 = nn.Sequential(
														
 
															             ConvModule(int(256 * cfg.width), int(256 * cfg.width), kernel_size=3, stride=2),
														
 
															-            C3k2fBlock(in_dim     = int(256 * cfg.width),
														
 
															+            YoloStage(in_dim     = int(256 * cfg.width),
														
 
															                       out_dim    = int(512 * cfg.width),
														
 
															                       num_blocks = round(2*cfg.depth),
														
 
															                       shortcut   = True,
														
@@ -43,7 +43,7 @@ class Yolo11Backbone(nn.Module):
 
															         # P4/16
														
 
															         self.layer_4 = nn.Sequential(
														
 
															             ConvModule(int(512 * cfg.width), int(512 * cfg.width), kernel_size=3, stride=2),
														
 
															-            C3k2fBlock(in_dim     = int(512 * cfg.width),
														
 
															+            YoloStage(in_dim     = int(512 * cfg.width),
														
 
															                       out_dim    = int(512 * cfg.width),
														
 
															                       num_blocks = round(2*cfg.depth),
														
 
															                       shortcut   = True,
														
@@ -54,7 +54,7 @@ class Yolo11Backbone(nn.Module):
 
															         # P5/32
														
 
															         self.layer_5 = nn.Sequential(
														
 
															             ConvModule(int(512 * cfg.width), int(512 * cfg.width * cfg.ratio), kernel_size=3, stride=2),
														
 
															-            C3k2fBlock(in_dim     = int(512 * cfg.width * cfg.ratio),
														
 
															+            YoloStage(in_dim     = int(512 * cfg.width * cfg.ratio),
														
 
															                       out_dim    = int(512 * cfg.width * cfg.ratio),
														
 
															                       num_blocks = round(2*cfg.depth),
														
 
															                       shortcut   = True,
														
@@ -62,6 +62,17 @@ class Yolo11Backbone(nn.Module):
 
															                       use_c3k    = True,
														
 
															                       )
														
 
															         )
														
 
															+        # Extra module (no pretrained weight)
														
 
															+        self.layer_6 = SPPF(in_dim  = int(512 * cfg.width * cfg.ratio),
														
 
															+                            out_dim = int(512 * cfg.width * cfg.ratio),
														
 
															+                            spp_pooling_size = 5,
														
 
															+                            neck_expand_ratio = 0.5,
														
 
															+                            )
														
 
															+        self.layer_7 = C2PSA(in_dim  = int(512 * cfg.width * cfg.ratio),
														
 
															+                             out_dim = int(512 * cfg.width * cfg.ratio),
														
 
															+                             num_blocks = round(2*cfg.depth),
														
 
															+                             expansion = 0.5,
														
 
															+                             )
														
 
															         # Initialize all layers
														
 
															         self.init_weights()
														
@@ -77,6 +88,8 @@ class Yolo11Backbone(nn.Module):
 
															         c3 = self.layer_3(c2)
														
 
															         c4 = self.layer_4(c3)
														
 
															         c5 = self.layer_5(c4)
														
 
															+        c5 = self.layer_6(c5)
														
 
															+        c5 = self.layer_7(c5)
														
 
															         outputs = [c3, c4, c5]
														
 
															         return outputs
														
--- a/yolo/models/yolo11/yolo11_head.py
+++ b/yolo/models/yolo11/yolo11_head.py
@@ -1,112 +1,133 @@
 
															+import math
														
 
															 import torch
														
 
															 import torch.nn as nn
														
 
															 from typing import List
														
 
															 try:
														
 
															-    from .modules import ConvModule
														
 
															+    from .modules import ConvModule, DflLayer
														
 
															 except:
														
 
															-    from  modules import ConvModule
														
 
															-
														
 
															-
														
 
															-# -------------------- Detection Head --------------------
														
 
															-## Single-level Detection Head
														
 
															-class DetHead(nn.Module):
														
 
															-    def __init__(self,
														
 
															-                 in_dim       :int  = 256,
														
 
															-                 cls_head_dim :int  = 256,
														
 
															-                 reg_head_dim :int  = 256,
														
 
															-                 num_cls_head :int  = 2,
														
 
															-                 num_reg_head :int  = 2,
														
 
															-                 ):
														
 
															-        super().__init__()
														
 
															-        # --------- Basic Parameters ----------
														
 
															-        self.in_dim = in_dim
														
 
															-        self.num_cls_head = num_cls_head
														
 
															-        self.num_reg_head = num_reg_head
														
 
															-        
														
 
															-        # --------- Network Parameters ----------
														
 
															-        ## classification head
														
 
															-        cls_feats = []
														
 
															-        self.cls_head_dim = cls_head_dim
														
 
															-        for i in range(num_cls_head):
														
 
															-            if i == 0:
														
 
															-                cls_feats.append(nn.Sequential(
														
 
															-                    ConvModule(in_dim, in_dim, kernel_size=3, stride=1, groups=in_dim),
														
 
															-                    ConvModule(in_dim, self.cls_head_dim, kernel_size=1),
														
 
															-                ))
														
 
															-            else:
														
 
															-                cls_feats.append(nn.Sequential(
														
 
															-                    ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=3, stride=1, groups=self.cls_head_dim),
														
 
															-                    ConvModule(self.cls_head_dim, self.cls_head_dim, kernel_size=1),
														
 
															-                ))
														
 
															-        
														
 
															-        ## bbox regression head
														
 
															-        reg_feats = []
														
 
															-        self.reg_head_dim = reg_head_dim
														
 
															-        for i in range(num_reg_head):
														
 
															-            if i == 0:
														
 
															-                reg_feats.append(ConvModule(in_dim, self.reg_head_dim, kernel_size=3, stride=1))
														
 
															-            else:
														
 
															-                reg_feats.append(ConvModule(self.reg_head_dim, self.reg_head_dim, kernel_size=3, stride=1))
														
 
															-        
														
 
															-        self.cls_feats = nn.Sequential(*cls_feats)
														
 
															-        self.reg_feats = nn.Sequential(*reg_feats)
														
 
															+    from  modules import ConvModule, DflLayer
														
 
															-        self.init_weights()
														
 
															-        
														
 
															-    def init_weights(self):
														
 
															-        """Initialize the parameters."""
														
 
															-        for m in self.modules():
														
 
															-            if isinstance(m, torch.nn.Conv2d):
														
 
															-                m.reset_parameters()
														
 
															-    def forward(self, x):
														
 
															-        """
														
 
															-            in_feats: (Tensor) [B, C, H, W]
														
 
															-        """
														
 
															-        cls_feats = self.cls_feats(x)
														
 
															-        reg_feats = self.reg_feats(x)
														
 
															-
														
 
															-        return cls_feats, reg_feats
														
 
															-    
														
 
															-## Multi-level Detection Head
														
 
															+# YOLO11 detection head
														
 
															 class Yolo11DetHead(nn.Module):
														
 
															-    def __init__(self, cfg, in_dims: List = [256, 512, 1024]):
														
 
															+    def __init__(self, cfg, fpn_dims: List = [64, 128, 245]):
														
 
															         super().__init__()
														
 
															-        self.num_levels = len(cfg.out_stride)
														
 
															-        ## ----------- Network Parameters -----------
														
 
															-        self.multi_level_heads = nn.ModuleList(
														
 
															-            [DetHead(in_dim       = in_dims[level],
														
 
															-                     cls_head_dim = max(in_dims[0], min(cfg.num_classes, 128)),
														
 
															-                     reg_head_dim = max(in_dims[0]//4, 16, 4*cfg.reg_max),
														
 
															-                     num_cls_head = cfg.num_cls_head,
														
 
															-                     num_reg_head = cfg.num_reg_head,
														
 
															-                     ) for level in range(self.num_levels)])
														
 
															-        # --------- Basic Parameters ----------
														
 
															-        self.in_dims = in_dims
														
 
															-        self.cls_head_dim = self.multi_level_heads[0].cls_head_dim
														
 
															-        self.reg_head_dim = self.multi_level_heads[0].reg_head_dim
														
 
															-
														
 
															-    def forward(self, feats):
														
 
															+        self.out_stride = cfg.out_stride
														
 
															+        self.reg_max = cfg.reg_max
														
 
															+        self.num_classes = cfg.num_classes
														
 
															+
														
 
															+        self.cls_dim = max(fpn_dims[0], min(cfg.num_classes, 128))
														
 
															+        self.reg_dim = max(fpn_dims[0]//4, 16, 4*cfg.reg_max)
														
 
															+
														
 
															+        # classification head
														
 
															+        self.cls_heads = nn.ModuleList(
														
 
															+            nn.Sequential(
														
 
															+                nn.Sequential(ConvModule(dim, dim, kernel_size=3, stride=1, groups=dim),
														
 
															+                              ConvModule(dim, self.cls_dim, kernel_size=1)),
														
 
															+                nn.Sequential(ConvModule(self.cls_dim, self.cls_dim, kernel_size=3, stride=1, groups=self.cls_dim),
														
 
															+                              ConvModule(self.cls_dim, self.cls_dim, kernel_size=1)),
														
 
															+                nn.Conv2d(self.cls_dim, cfg.num_classes, kernel_size=1),
														
 
															+            )
														
 
															+            for dim in fpn_dims
														
 
															+        )
														
 
															+
														
 
															+        # bbox regression head
														
 
															+        self.reg_heads = nn.ModuleList(
														
 
															+            nn.Sequential(
														
 
															+                ConvModule(dim, self.reg_dim, kernel_size=3, stride=1),
														
 
															+                ConvModule(self.reg_dim, self.reg_dim, kernel_size=3, stride=1),
														
 
															+                nn.Conv2d(self.reg_dim, 4*cfg.reg_max, kernel_size=1),
														
 
															+            )
														
 
															+            for dim in fpn_dims
														
 
															+        )
														
 
															+
														
 
															+        # DFL layer for decoding bbox
														
 
															+        self.dfl_layer = DflLayer(cfg.reg_max)
														
 
															+        for p in self.dfl_layer.parameters():
														
 
															+            p.requires_grad = False
														
 
															+
														
 
															+        self.init_bias()
														
 
															+        
														
 
															+    def init_bias(self):
														
 
															+        # cls pred
														
 
															+        for i, m in enumerate(self.cls_heads):
														
 
															+            b = m[-1].bias.view(1, -1)
														
 
															+            b.data.fill_(math.log(5 / self.num_classes / (640. / self.out_stride[i]) ** 2))
														
 
															+            m[-1].bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
														
 
															+
														
 
															+        # reg pred
														
 
															+        for m in self.reg_heads:
														
 
															+            b = m[-1].bias.view(-1, )
														
 
															+            b.data.fill_(1.0)
														
 
															+            m[-1].bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
														
 
															+            
														
 
															+            w = m[-1].weight
														
 
															+            w.data.fill_(0.)
														
 
															+            m[-1].weight = torch.nn.Parameter(w, requires_grad=True)
														
 
															+
														
 
															+    def generate_anchors(self, fmp_size, level):
														
 
															         """
														
 
															-            feats: List[(Tensor)] [[B, C, H, W], ...]
														
 
															+            fmp_size: (List) [H, W]
														
 
															         """
														
 
															-        cls_feats = []
														
 
															-        reg_feats = []
														
 
															-        for feat, head in zip(feats, self.multi_level_heads):
														
 
															-            # ---------------- Pred ----------------
														
 
															-            cls_feat, reg_feat = head(feat)
														
 
															-
														
 
															-            cls_feats.append(cls_feat)
														
 
															-            reg_feats.append(reg_feat)
														
 
															-
														
 
															-        return cls_feats, reg_feats
														
 
															+        # generate grid cells
														
 
															+        fmp_h, fmp_w = fmp_size
														
 
															+        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															+        # [H, W, 2] -> [HW, 2]
														
 
															+        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
														
 
															+        anchors += 0.5  # add center offset
														
 
															+        anchors *= self.out_stride[level]
														
 
															+
														
 
															+        return anchors
														
 
															+
														
 
															+    def forward(self, fpn_feats):
														
 
															+        anchors = []
														
 
															+        strides = []
														
 
															+        cls_preds = []
														
 
															+        reg_preds = []
														
 
															+        box_preds = []
														
 
															+
														
 
															+        for lvl, (feat, cls_head, reg_head) in enumerate(zip(fpn_feats, self.cls_heads, self.reg_heads)):
														
 
															+            bs, c, h, w = feat.size()
														
 
															+            device = feat.device
														
 
															+            
														
 
															+            # Prediction
														
 
															+            cls_pred = cls_head(feat)
														
 
															+            reg_pred = reg_head(feat)
														
 
															+
														
 
															+            # [bs, c, h, w] -> [bs, c, hw] -> [bs, hw, c]
														
 
															+            cls_pred = cls_pred.flatten(2).permute(0, 2, 1).contiguous()
														
 
															+            reg_pred = reg_pred.flatten(2).permute(0, 2, 1).contiguous()
														
 
															+
														
 
															+            # anchor points: [M, 2]
														
 
															+            anchor = self.generate_anchors(fmp_size=[h, w], level=lvl).to(device)
														
 
															+            stride = torch.ones_like(anchor[..., :1]) * self.out_stride[lvl]
														
 
															+
														
 
															+            # Decode bbox coords
														
 
															+            box_pred = self.dfl_layer(reg_pred, anchor[None], self.out_stride[lvl])
														
 
															+
														
 
															+            # collect results
														
 
															+            anchors.append(anchor)
														
 
															+            strides.append(stride)
														
 
															+            cls_preds.append(cls_pred)
														
 
															+            reg_preds.append(reg_pred)
														
 
															+            box_preds.append(box_pred)
														
 
															+
														
 
															+        # output dict
														
 
															+        outputs = {"pred_cls":       cls_preds,        # List(Tensor) [B, M, C]
														
 
															+                   "pred_reg":       reg_preds,        # List(Tensor) [B, M, 4*(reg_max)]
														
 
															+                   "pred_box":       box_preds,        # List(Tensor) [B, M, 4]
														
 
															+                   "anchors":        anchors,          # List(Tensor) [M, 2]
														
 
															+                   "stride_tensors": strides,          # List(Tensor) [M, 1]
														
 
															+                   "strides":        self.out_stride,  # List(Int) = [8, 16, 32]
														
 
															+                   }
														
 
															+
														
 
															+        return outputs
														
 
															 if __name__=='__main__':
														
 
															-    import time
														
 
															     from thop import profile
														
 
															-    
														
 
															+
														
 
															     # YOLO11-Base config
														
 
															     class Yolo11BaseConfig(object):
														
 
															         def __init__(self) -> None:
														
@@ -118,32 +139,24 @@ if __name__=='__main__':
 
															             self.out_stride = [8, 16, 32]
														
 
															             self.max_stride = 32
														
 
															             self.num_levels = 3
														
 
															-            ## Head
														
 
															-            self.num_cls_head = 2
														
 
															-            self.num_reg_head = 2
														
 
															+            self.num_classes = 80
														
 
															     cfg = Yolo11BaseConfig()
														
 
															-    cfg.num_classes = 20
														
 
															-    # Build a head
														
 
															-    fpn_dims = [128, 256, 512]
														
 
															-    pyramid_feats = [torch.randn(1, fpn_dims[0], 80, 80),
														
 
															-                     torch.randn(1, fpn_dims[1], 40, 40),
														
 
															-                     torch.randn(1, fpn_dims[2], 20, 20)]
														
 
															-    head = Yolo11DetHead(cfg, fpn_dims)
														
 
															+    # Random data
														
 
															+    fpn_dims = [256, 512, 512]
														
 
															+    x = [torch.randn(1, fpn_dims[0], 80, 80),
														
 
															+         torch.randn(1, fpn_dims[1], 40, 40),
														
 
															+         torch.randn(1, fpn_dims[2], 20, 20)]
														
 
															+    # Neck model
														
 
															+    model = Yolo11DetHead(cfg, fpn_dims)
														
 
															     # Inference
														
 
															-    t0 = time.time()
														
 
															-    cls_feats, reg_feats = head(pyramid_feats)
														
 
															-    t1 = time.time()
														
 
															-    print('Time: ', t1 - t0)
														
 
															-    print("====== Yolo11 Head output ======")
														
 
															-    for level, (cls_f, reg_f) in enumerate(zip(cls_feats, reg_feats)):
														
 
															-        print("- Level-{} : ".format(level), cls_f.shape, reg_f.shape)
														
 
															-
														
 
															-    flops, params = profile(head, inputs=(pyramid_feats, ), verbose=False)
														
 
															-    print('==============================')
														
 
															+    outputs = model(x)
														
 
															+
														
 
															+    print('============ FLOPs & Params ===========')
														
 
															+    flops, params = profile(model, inputs=(x, ), verbose=False)
														
 
															     print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
														
 
															     print('Params : {:.2f} M'.format(params / 1e6))
														
--- a/yolo/models/yolo11/yolo11_neck.py
+++ b/yolo/models/yolo11/yolo11_neck.py
@@ -1,45 +0,0 @@
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-
														
 
															-try:
														
 
															-    from .modules import ConvModule, PSABlock
														
 
															-except:
														
 
															-    from  modules import ConvModule, PSABlock
														
 
															-
														
 
															-
														
 
															-class SPPF(nn.Module):
														
 
															-    def __init__(self, in_dim, out_dim, spp_pooling_size: int = 5, neck_expand_ratio:float = 0.5):
														
 
															-        super().__init__()
														
 
															-        ## ----------- Basic Parameters -----------
														
 
															-        inter_dim = round(in_dim * neck_expand_ratio)
														
 
															-        self.out_dim = out_dim
														
 
															-        ## ----------- Network Parameters -----------
														
 
															-        self.cv1 = ConvModule(in_dim, inter_dim, kernel_size=1, stride=1)
														
 
															-        self.cv2 = ConvModule(inter_dim * 4, out_dim, kernel_size=1, stride=1)
														
 
															-        self.m = nn.MaxPool2d(kernel_size=spp_pooling_size, stride=1, padding=spp_pooling_size // 2)
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        x = self.cv1(x)
														
 
															-        y1 = self.m(x)
														
 
															-        y2 = self.m(y1)
														
 
															-
														
 
															-        return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
														
 
															-    
														
 
															-class C2PSA(nn.Module):
														
 
															-    def __init__(self, in_dim, out_dim, num_blocks=1, expansion=0.5):
														
 
															-        super().__init__()
														
 
															-        assert in_dim == out_dim
														
 
															-        inter_dim = int(in_dim * expansion)
														
 
															-        self.cv1 = ConvModule(in_dim, 2 * inter_dim, kernel_size=1)
														
 
															-        self.cv2 = ConvModule(2 * inter_dim, in_dim, kernel_size=1)
														
 
															-        self.m = nn.Sequential(*[
														
 
															-            PSABlock(in_dim     = inter_dim,
														
 
															-                     attn_ratio = 0.5,
														
 
															-                     num_heads  = inter_dim // 64
														
 
															-                     ) for _ in range(num_blocks)])
														
 
															-
														
 
															-    def forward(self, x):
														
 
															-        x1, x2 = torch.chunk(self.cv1(x), chunks=2, dim=1)
														
 
															-        x2 = self.m(x2)
														
 
															-
														
 
															-        return self.cv2(torch.cat([x1, x2], dim=1))
														
--- a/yolo/models/yolo11/yolo11_pafpn.py
+++ b/yolo/models/yolo11/yolo11_pafpn.py
@@ -4,9 +4,9 @@ import torch.nn.functional as F
 
															 from typing import List
														
 
															 try:
														
 
															-    from .modules import ConvModule, C3k2fBlock
														
 
															+    from .modules import ConvModule, YoloStage
														
 
															 except:
														
 
															-    from  modules import ConvModule, C3k2fBlock
														
 
															+    from  modules import ConvModule, YoloStage
														
 
															 class Yolo11PaFPN(nn.Module):
														
@@ -19,7 +19,7 @@ class Yolo11PaFPN(nn.Module):
 
															         # ----------------------------- Yolo11's Top-down FPN -----------------------------
														
 
															         ## P5 -> P4
														
 
															-        self.top_down_layer_1 = C3k2fBlock(in_dim     = self.in_dims[0] + self.in_dims[1],
														
 
															+        self.top_down_layer_1 = YoloStage(in_dim     = self.in_dims[0] + self.in_dims[1],
														
 
															                                           out_dim    = round(512*cfg.width),
														
 
															                                           num_blocks = round(2 * cfg.depth),
														
 
															                                           shortcut   = True,
														
@@ -27,7 +27,7 @@ class Yolo11PaFPN(nn.Module):
 
															                                           use_c3k    = False if self.model_scale in "ns" else True,
														
 
															                                           )
														
 
															         ## P4 -> P3
														
 
															-        self.top_down_layer_2 = C3k2fBlock(in_dim     = self.in_dims[2] + round(512*cfg.width),
														
 
															+        self.top_down_layer_2 = YoloStage(in_dim     = self.in_dims[2] + round(512*cfg.width),
														
 
															                                           out_dim    = round(256*cfg.width),
														
 
															                                           num_blocks = round(2 * cfg.depth),
														
 
															                                           shortcut   = True,
														
@@ -37,7 +37,7 @@ class Yolo11PaFPN(nn.Module):
 
															         # ----------------------------- Yolo11's Bottom-up PAN -----------------------------
														
 
															         ## P3 -> P4
														
 
															         self.dowmsample_layer_1 = ConvModule(round(256*cfg.width), round(256*cfg.width), kernel_size=3, stride=2)
														
 
															-        self.bottom_up_layer_1 = C3k2fBlock(in_dim     = round(256*cfg.width) + round(512*cfg.width),
														
 
															+        self.bottom_up_layer_1 = YoloStage(in_dim     = round(256*cfg.width) + round(512*cfg.width),
														
 
															                                            out_dim    = round(512*cfg.width),
														
 
															                                            num_blocks = round(2 * cfg.depth),
														
 
															                                            shortcut   = True,
														
@@ -46,7 +46,7 @@ class Yolo11PaFPN(nn.Module):
 
															                                            )
														
 
															         ## P4 -> P5
														
 
															         self.dowmsample_layer_2 = ConvModule(round(512*cfg.width), round(512*cfg.width), kernel_size=3, stride=2)
														
 
															-        self.bottom_up_layer_2 = C3k2fBlock(in_dim     = round(512*cfg.width) + self.in_dims[0],
														
 
															+        self.bottom_up_layer_2 = YoloStage(in_dim     = round(512*cfg.width) + self.in_dims[0],
														
 
															                                            out_dim    = round(512*cfg.width*cfg.ratio),
														
 
															                                            num_blocks = round(2 * cfg.depth),
														
 
															                                            shortcut   = True,
														
--- a/yolo/models/yolo11/yolo11_pred.py
+++ b/yolo/models/yolo11/yolo11_pred.py
@@ -1,207 +0,0 @@
 
															-import math
														
 
															-import torch
														
 
															-import torch.nn as nn
														
 
															-import torch.nn.functional as F
														
 
															-
														
 
															-
														
 
															-# -------------------- Detection Pred Layer --------------------
														
 
															-## Single-level pred layer
														
 
															-class DetPredLayer(nn.Module):
														
 
															-    def __init__(self,
														
 
															-                 cls_dim     :int = 256,
														
 
															-                 reg_dim     :int = 256,
														
 
															-                 stride      :int = 32,
														
 
															-                 reg_max     :int = 16,
														
 
															-                 num_classes :int = 80,
														
 
															-                 num_coords  :int = 4):
														
 
															-        super().__init__()
														
 
															-        # --------- Basic Parameters ----------
														
 
															-        self.stride = stride
														
 
															-        self.cls_dim = cls_dim
														
 
															-        self.reg_dim = reg_dim
														
 
															-        self.reg_max = reg_max
														
 
															-        self.num_classes = num_classes
														
 
															-        self.num_coords = num_coords
														
 
															-
														
 
															-        # --------- Network Parameters ----------
														
 
															-        self.cls_pred = nn.Conv2d(cls_dim, num_classes, kernel_size=1)
														
 
															-        self.reg_pred = nn.Conv2d(reg_dim, num_coords, kernel_size=1)                
														
 
															-
														
 
															-        self.init_bias()
														
 
															-        
														
 
															-    def init_bias(self):
														
 
															-        # cls pred bias
														
 
															-        b = self.cls_pred.bias.view(1, -1)
														
 
															-        b.data.fill_(math.log(5 / self.num_classes / (640. / self.stride) ** 2))
														
 
															-        self.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
														
 
															-        # reg pred bias
														
 
															-        b = self.reg_pred.bias.view(-1, )
														
 
															-        b.data.fill_(1.0)
														
 
															-        self.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
														
 
															-        w = self.reg_pred.weight
														
 
															-        w.data.fill_(0.)
														
 
															-        self.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
														
 
															-
														
 
															-    def generate_anchors(self, fmp_size):
														
 
															-        """
														
 
															-            fmp_size: (List) [H, W]
														
 
															-        """
														
 
															-        # generate grid cells
														
 
															-        fmp_h, fmp_w = fmp_size
														
 
															-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
														
 
															-        # [H, W, 2] -> [HW, 2]
														
 
															-        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
														
 
															-        anchors += 0.5  # add center offset
														
 
															-        anchors *= self.stride
														
 
															-
														
 
															-        return anchors
														
 
															-        
														
 
															-    def forward(self, cls_feat, reg_feat):
														
 
															-        # pred
														
 
															-        cls_pred = self.cls_pred(cls_feat)
														
 
															-        reg_pred = self.reg_pred(reg_feat)
														
 
															-
														
 
															-        # generate anchor boxes: [M, 4]
														
 
															-        B, _, H, W = cls_pred.size()
														
 
															-        fmp_size = [H, W]
														
 
															-        anchors = self.generate_anchors(fmp_size)
														
 
															-        anchors = anchors.to(cls_pred.device)
														
 
															-        # stride tensor: [M, 1]
														
 
															-        stride_tensor = torch.ones_like(anchors[..., :1]) * self.stride
														
 
															-        
														
 
															-        # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
														
 
															-        cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
														
 
															-        reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4*self.reg_max)
														
 
															-        
														
 
															-        # output dict
														
 
															-        outputs = {"pred_cls": cls_pred,            # List(Tensor) [B, M, C]
														
 
															-                   "pred_reg": reg_pred,            # List(Tensor) [B, M, 4*(reg_max)]
														
 
															-                   "anchors": anchors,              # List(Tensor) [M, 2]
														
 
															-                   "strides": self.stride,          # List(Int) = [8, 16, 32]
														
 
															-                   "stride_tensor": stride_tensor   # List(Tensor) [M, 1]
														
 
															-                   }
														
 
															-
														
 
															-        return outputs
														
 
															-
														
 
															-## Multi-level pred layer
														
 
															-class Yolo11DetPredLayer(nn.Module):
														
 
															-    def __init__(self, cfg, cls_dim: int, reg_dim: int):
														
 
															-        super().__init__()
														
 
															-        # --------- Basic Parameters ----------
														
 
															-        self.cfg = cfg
														
 
															-        self.cls_dim = cls_dim
														
 
															-        self.reg_dim = reg_dim
														
 
															-        self.num_levels = len(cfg.out_stride)
														
 
															-
														
 
															-        # ----------- Network Parameters -----------
														
 
															-        ## pred layers
														
 
															-        self.multi_level_preds = nn.ModuleList(
														
 
															-            [DetPredLayer(cls_dim     = cls_dim,
														
 
															-                          reg_dim     = reg_dim,
														
 
															-                          stride      = cfg.out_stride[level],
														
 
															-                          reg_max     = cfg.reg_max,
														
 
															-                          num_classes = cfg.num_classes,
														
 
															-                          num_coords  = 4 * cfg.reg_max)
														
 
															-                          for level in range(self.num_levels)
														
 
															-                          ])
														
 
															-        ## proj conv
														
 
															-        proj_init = torch.arange(cfg.reg_max, dtype=torch.float)
														
 
															-        self.proj_conv = nn.Conv2d(cfg.reg_max, 1, kernel_size=1, bias=False).requires_grad_(False)
														
 
															-        self.proj_conv.weight.data[:] = nn.Parameter(proj_init.view([1, cfg.reg_max, 1, 1]), requires_grad=False)
														
 
															-
														
 
															-    def forward(self, cls_feats, reg_feats):
														
 
															-        all_anchors = []
														
 
															-        all_strides = []
														
 
															-        all_cls_preds = []
														
 
															-        all_reg_preds = []
														
 
															-        all_box_preds = []
														
 
															-        for level in range(self.num_levels):
														
 
															-            # -------------- Single-level prediction --------------
														
 
															-            outputs = self.multi_level_preds[level](cls_feats[level], reg_feats[level])
														
 
															-
														
 
															-            # -------------- Decode bbox --------------
														
 
															-            B, M = outputs["pred_reg"].shape[:2]
														
 
															-            # [B, M, 4*(reg_max)] -> [B, M, 4, reg_max]
														
 
															-            delta_pred = outputs["pred_reg"].reshape([B, M, 4, self.cfg.reg_max])
														
 
															-            # [B, M, 4, reg_max] -> [B, reg_max, 4, M]
														
 
															-            delta_pred = delta_pred.permute(0, 3, 2, 1).contiguous()
														
 
															-            # [B, reg_max, 4, M] -> [B, 1, 4, M]
														
 
															-            delta_pred = self.proj_conv(F.softmax(delta_pred, dim=1))
														
 
															-            # [B, 1, 4, M] -> [B, 4, M] -> [B, M, 4]
														
 
															-            delta_pred = delta_pred.view(B, 4, M).permute(0, 2, 1).contiguous()
														
 
															-            ## tlbr -> xyxy
														
 
															-            x1y1_pred = outputs["anchors"][None] - delta_pred[..., :2] * self.cfg.out_stride[level]
														
 
															-            x2y2_pred = outputs["anchors"][None] + delta_pred[..., 2:] * self.cfg.out_stride[level]
														
 
															-            box_pred = torch.cat([x1y1_pred, x2y2_pred], dim=-1)
														
 
															-
														
 
															-            # collect results
														
 
															-            all_cls_preds.append(outputs["pred_cls"])
														
 
															-            all_reg_preds.append(outputs["pred_reg"])
														
 
															-            all_box_preds.append(box_pred)
														
 
															-            all_anchors.append(outputs["anchors"])
														
 
															-            all_strides.append(outputs["stride_tensor"])
														
 
															-        
														
 
															-        # output dict
														
 
															-        outputs = {"pred_cls":      all_cls_preds,         # List(Tensor) [B, M, C]
														
 
															-                   "pred_reg":      all_reg_preds,         # List(Tensor) [B, M, 4*(reg_max)]
														
 
															-                   "pred_box":      all_box_preds,         # List(Tensor) [B, M, 4]
														
 
															-                   "anchors":       all_anchors,           # List(Tensor) [M, 2]
														
 
															-                   "stride_tensor": all_strides,           # List(Tensor) [M, 1]
														
 
															-                   "strides":       self.cfg.out_stride,   # List(Int) = [8, 16, 32]
														
 
															-                   }
														
 
															-
														
 
															-        return outputs
														
 
															-
														
 
															-
														
 
															-if __name__=='__main__':
														
 
															-    import time
														
 
															-    from thop import profile
														
 
															-    # Model config
														
 
															-    
														
 
															-    # YOLO11-Base config
														
 
															-    class Yolo11BaseConfig(object):
														
 
															-        def __init__(self) -> None:
														
 
															-            # ---------------- Model config ----------------
														
 
															-            self.width    = 1.0
														
 
															-            self.depth    = 1.0
														
 
															-            self.ratio    = 1.0
														
 
															-            self.reg_max  = 16
														
 
															-            self.out_stride = [8, 16, 32]
														
 
															-            self.max_stride = 32
														
 
															-            self.num_levels = 3
														
 
															-            ## Head
														
 
															-
														
 
															-    cfg = Yolo11BaseConfig()
														
 
															-    cfg.num_classes = 20
														
 
															-    cls_dim = 128
														
 
															-    reg_dim = 64
														
 
															-    # Build a pred layer
														
 
															-    pred = Yolo11DetPredLayer(cfg, cls_dim, reg_dim)
														
 
															-
														
 
															-    # Inference
														
 
															-    cls_feats = [torch.randn(1, cls_dim, 80, 80),
														
 
															-                 torch.randn(1, cls_dim, 40, 40),
														
 
															-                 torch.randn(1, cls_dim, 20, 20),]
														
 
															-    reg_feats = [torch.randn(1, reg_dim, 80, 80),
														
 
															-                 torch.randn(1, reg_dim, 40, 40),
														
 
															-                 torch.randn(1, reg_dim, 20, 20),]
														
 
															-    t0 = time.time()
														
 
															-    output = pred(cls_feats, reg_feats)
														
 
															-    t1 = time.time()
														
 
															-    print('Time: ', t1 - t0)
														
 
															-    print('====== Pred output ======= ')
														
 
															-    pred_cls = output["pred_cls"]
														
 
															-    pred_reg = output["pred_reg"]
														
 
															-    pred_box = output["pred_box"]
														
 
															-    anchors  = output["anchors"]
														
 
															-    
														
 
															-    for level in range(cfg.num_levels):
														
 
															-        print("- Level-{} : classification   -> {}".format(level, pred_cls[level].shape))
														
 
															-        print("- Level-{} : delta regression -> {}".format(level, pred_reg[level].shape))
														
 
															-        print("- Level-{} : bbox regression  -> {}".format(level, pred_box[level].shape))
														
 
															-        print("- Level-{} : anchor boxes     -> {}".format(level, anchors[level].shape))
														
 
															-
														
 
															-    flops, params = profile(pred, inputs=(cls_feats, reg_feats, ), verbose=False)
														
 
															-    print('==============================')
														
 
															-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
														
 
															-    print('Params : {:.2f} M'.format(params / 1e6))