1 年間前 · 9228f75dce
--- a/config/__init__.py
+++ b/config/__init__.py
@@ -87,6 +87,7 @@ from .model_config.yolov8_config import yolov8_cfg
 
				 from .model_config.yolox_config import yolox_cfg
			
 
				 ## My RTCDet series
			
 
				 from .model_config.rtcdet_config import rtcdet_cfg, rtcdet_seg_cfg, rtcdet_pos_cfg, rtcdet_seg_pos_cfg
			
 
				+from .model_config.ctrnet_config import ctrnet_cfg
			
 
				 
			
 
				 def build_model_config(args):
			
 
				     print('==============================')
			
@@ -118,6 +119,9 @@ def build_model_config(args):
 
				     # RTCDet
			
 
				     elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				         cfg = rtcdet_cfg[args.model]
			
 
				+    # CenterNet
			
 
				+    elif args.model in ['ctrnet_n', 'ctrnet_t', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
			
 
				+        cfg = ctrnet_cfg[args.model]
			
 
				 
			
 
				     return cfg
			
 
				 
			
--- a/config/model_config/ctrnet_config.py
+++ b/config/model_config/ctrnet_config.py
@@ -11,7 +11,6 @@ ctrnet_cfg = {
 
				         'bk_depthwise': False,
			
 
				         'width': 0.25,
			
 
				         'depth': 0.34,
			
 
				-        'ratio': 2.0,
			
 
				         'max_stride': 32,
			
 
				         'out_stride': 4,
			
 
				         ## Neck
			
@@ -27,8 +26,8 @@ ctrnet_cfg = {
 
				         'dec_depthwise': False,
			
 
				         ## Head
			
 
				         'head': 'decoupled_head',
			
 
				-        'num_cls_head': 2,
			
 
				-        'num_reg_head': 2,
			
 
				+        'num_cls_head': 4,
			
 
				+        'num_reg_head': 4,
			
 
				         'head_act': 'silu',
			
 
				         'head_norm': 'BN',
			
 
				         'head_depthwise': False,  
			
--- a/models/detectors/__init__.py
+++ b/models/detectors/__init__.py
@@ -13,6 +13,7 @@ from .yolov8.build import build_yolov8
 
				 from .yolox.build import build_yolox
			
 
				 # My RTCDet series
			
 
				 from .rtcdet.build import build_rtcdet
			
 
				+from .ctrnet.build import build_ctrnet
			
 
				 
			
 
				 
			
 
				 # build object detector
			
@@ -58,6 +59,10 @@ def build_model(args,
 
				     elif args.model in ['rtcdet_n', 'rtcdet_t', 'rtcdet_s', 'rtcdet_m', 'rtcdet_l', 'rtcdet_x']:
			
 
				         model, criterion = build_rtcdet(
			
 
				             args, model_cfg, device, num_classes, trainable, deploy)
			
 
				+    # CenterNet
			
 
				+    elif args.model in ['ctrnet_n', 'ctrnet_t', 'ctrnet_s', 'ctrnet_m', 'ctrnet_l', 'ctrnet_x']:
			
 
				+        model, criterion = build_ctrnet(
			
 
				+            args, model_cfg, device, num_classes, trainable, deploy)
			
 
				 
			
 
				     if trainable:
			
 
				         # Load pretrained weight
			
--- a/models/detectors/ctrnet/build.py
+++ b/models/detectors/ctrnet/build.py
@@ -0,0 +1,43 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding:utf-8 -*-
			
 
				+
			
 
				+import torch
			
 
				+import torch.nn as nn
			
 
				+
			
 
				+from .loss import build_criterion
			
 
				+from .ctrnet import CenterNet
			
 
				+
			
 
				+
			
 
				+# build object detector
			
 
				+def build_ctrnet(args, cfg, device, num_classes=80, trainable=False, deploy=False):
			
 
				+    print('==============================')
			
 
				+    print('Build {} ...'.format(args.model.upper()))
			
 
				+    
			
 
				+    print('==============================')
			
 
				+    print('Model Configuration: \n', cfg)
			
 
				+    
			
 
				+    # -------------- Build CenterNet --------------
			
 
				+    model = CenterNet(cfg                = cfg,
			
 
				+                      device             = device, 
			
 
				+                      num_classes        = num_classes,
			
 
				+                      trainable          = trainable,
			
 
				+                      conf_thresh        = args.conf_thresh,
			
 
				+                      topk               = args.topk,
			
 
				+                      deploy             = deploy,
			
 
				+                      no_multi_labels    = args.no_multi_labels,
			
 
				+                      nms_class_agnostic = args.nms_class_agnostic
			
 
				+                      )
			
 
				+
			
 
				+    # -------------- Initialize CenterNet --------------
			
 
				+    for m in model.modules():
			
 
				+        if isinstance(m, nn.BatchNorm2d):
			
 
				+            m.eps = 1e-3
			
 
				+            m.momentum = 0.03    
			
 
				+            
			
 
				+    # -------------- Build criterion --------------
			
 
				+    criterion = None
			
 
				+    if trainable:
			
 
				+        # build criterion for training
			
 
				+        criterion = build_criterion(args, cfg, device, num_classes)
			
 
				+        
			
 
				+    return model, criterion
			
--- a/models/detectors/ctrnet/ctrnet.py
+++ b/models/detectors/ctrnet/ctrnet.py
@@ -13,7 +13,7 @@ from .ctrnet_pred    import build_det_pred
 
				 
			
 
				 
			
 
				 # CenterNet
			
 
				-class CenterNet():
			
 
				+class CenterNet(nn.Module):
			
 
				     def __init__(self,
			
 
				                  cfg,
			
 
				                  device,
			
@@ -42,7 +42,7 @@ class CenterNet():
 
				         
			
 
				         # ---------------- Network Parameters ----------------
			
 
				         ## Encoder
			
 
				-        self.encoder, feat_dims = build_encoder(cfg, pretrained=cfg['bk_pretrained']&trainable)
			
 
				+        self.encoder, feat_dims = build_encoder(cfg)
			
 
				 
			
 
				         ## Neck
			
 
				         self.neck = build_neck(cfg, feat_dims[-1], feat_dims[-1])
			
@@ -135,9 +135,9 @@ class CenterNet():
 
				         feat = self.decoder(feat)
			
 
				 
			
 
				         # ---------------- Head ----------------
			
 
				-        outputs = self.det_head(x)
			
 
				+        outputs = self.det_head(feat)
			
 
				         if self.trainable:
			
 
				-            outputs['aux_outputs'] = self.aux_det_head(x)
			
 
				+            outputs['aux_outputs'] = self.aux_det_head(feat)
			
 
				 
			
 
				         # ---------------- Post-process ----------------
			
 
				         if not self.trainable:
			
--- a/models/detectors/ctrnet/ctrnet_decoder.py
+++ b/models/detectors/ctrnet/ctrnet_decoder.py
@@ -36,7 +36,7 @@ class CTRDecoder(nn.Module):
 
				         layers = []
			
 
				         for _ in range(self.num_layers):
			
 
				             layer = nn.Sequential(
			
 
				-                RTCBlock(in_dim, out_dim, 1, False, act_type, norm_type, depthwise),
			
 
				+                RTCBlock(in_dim, out_dim, 3, False, act_type, norm_type, depthwise),
			
 
				                 DeConv(out_dim, out_dim, kernel_size=4, stride=2, act_type=act_type, norm_type=norm_type)
			
 
				             )
			
 
				             layers.append(layer)
			
--- a/models/detectors/ctrnet/ctrnet_encoder.py
+++ b/models/detectors/ctrnet/ctrnet_encoder.py
@@ -7,27 +7,15 @@ except:
 
				     from ctrnet_basic import Conv, RTCBlock
			
 
				 
			
 
				 
			
 
				-# MIM-pretrained weights
			
 
				-model_urls = {
			
 
				-    "rtcnet_n": None,
			
 
				-    "rtcnet_t": None,
			
 
				-    "rtcnet_s": None,
			
 
				-    "rtcnet_m": None,
			
 
				-    "rtcnet_l": None,
			
 
				-    "rtcnet_x": None,
			
 
				-}
			
 
				-
			
 
				-
			
 
				 # ---------------------------- Basic functions ----------------------------
			
 
				 ## Real-time Convolutional Backbone
			
 
				 class CTREncoder(nn.Module):
			
 
				-    def __init__(self, width=1.0, depth=1.0, ratio=1.0, act_type='silu', norm_type='BN', depthwise=False):
			
 
				+    def __init__(self, width=1.0, depth=1.0, act_type='silu', norm_type='BN', depthwise=False):
			
 
				         super(CTREncoder, self).__init__()
			
 
				         # ---------------- Basic parameters ----------------
			
 
				         self.width_factor = width
			
 
				         self.depth_factor = depth
			
 
				-        self.last_stage_factor = ratio
			
 
				-        self.feat_dims = [round(64 * width), round(128 * width), round(256 * width), round(512 * width), round(512 * width * ratio)]
			
 
				+        self.feat_dims = [round(64 * width), round(128 * width), round(256 * width), round(512 * width), round(1024 * width)]
			
 
				         # ---------------- Network parameters ----------------
			
 
				         ## P1/2
			
 
				         self.layer_1 = Conv(3, self.feat_dims[0], k=3, p=1, s=2, act_type=act_type, norm_type=norm_type)
			
@@ -90,67 +78,18 @@ class CTREncoder(nn.Module):
 
				 
			
 
				 # ---------------------------- Functions ----------------------------
			
 
				 ## build Backbone
			
 
				-def build_encoder(cfg, pretrained=False): 
			
 
				+def build_encoder(cfg): 
			
 
				     # build backbone model
			
 
				     backbone = CTREncoder(width=cfg['width'],
			
 
				                           depth=cfg['depth'],
			
 
				-                          ratio=cfg['ratio'],
			
 
				                           act_type=cfg['bk_act'],
			
 
				                           norm_type=cfg['bk_norm'],
			
 
				                           depthwise=cfg['bk_depthwise']
			
 
				                           )
			
 
				     feat_dims = backbone.feat_dims[-3:]
			
 
				-
			
 
				-    # load pretrained weight
			
 
				-    if pretrained:
			
 
				-        backbone = load_pretrained_weight(backbone)
			
 
				         
			
 
				     return backbone, feat_dims
			
 
				 
			
 
				-## load pretrained weight
			
 
				-def load_pretrained_weight(model):
			
 
				-    # Model name
			
 
				-    width, depth, ratio = model.width_factor, model.depth_factor, model.last_stage_factor
			
 
				-    if width == 0.25 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_n"
			
 
				-    elif width == 0.375 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_t"
			
 
				-    elif width == 0.50 and depth == 0.34 and ratio == 2.0:
			
 
				-        model_name = "rtcnet_s"
			
 
				-    elif width == 0.75 and depth == 0.67 and ratio == 1.5:
			
 
				-        model_name = "rtcnet_m"
			
 
				-    elif width == 1.0 and depth == 1.0 and ratio == 1.0:
			
 
				-        model_name = "rtcnet_l"
			
 
				-    elif width == 1.25 and depth == 1.34 and ratio == 1.0:
			
 
				-        model_name = "rtcnet_x"
			
 
				-    
			
 
				-    # Load pretrained weight
			
 
				-    url = model_urls[model_name]
			
 
				-    if url is not None:
			
 
				-        print('Loading pretrained weight ...')
			
 
				-        checkpoint = torch.hub.load_state_dict_from_url(
			
 
				-            url=url, map_location="cpu", check_hash=True)
			
 
				-        # checkpoint state dict
			
 
				-        checkpoint_state_dict = checkpoint.pop("model")
			
 
				-        # model state dict
			
 
				-        model_state_dict = model.state_dict()
			
 
				-        # check
			
 
				-        for k in list(checkpoint_state_dict.keys()):
			
 
				-            if k in model_state_dict:
			
 
				-                shape_model = tuple(model_state_dict[k].shape)
			
 
				-                shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
			
 
				-                if shape_model != shape_checkpoint:
			
 
				-                    checkpoint_state_dict.pop(k)
			
 
				-            else:
			
 
				-                checkpoint_state_dict.pop(k)
			
 
				-                print(k)
			
 
				-        # load the weight
			
 
				-        model.load_state_dict(checkpoint_state_dict)
			
 
				-    else:
			
 
				-        print('No backbone pretrained for {}.'.format(model_name))
			
 
				-
			
 
				-    return model
			
 
				-
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     import time
			
--- a/models/detectors/ctrnet/ctrnet_pred.py
+++ b/models/detectors/ctrnet/ctrnet_pred.py
@@ -60,7 +60,8 @@ class SDetPDLayer(nn.Module):
 
				 
			
 
				         return anchors
			
 
				         
			
 
				-    def forward(self, cls_feat, reg_feat):
			
 
				+    def forward(self, inputs):
			
 
				+        cls_feat, reg_feat = inputs['cls_feat'], inputs['reg_feat']
			
 
				         # pred
			
 
				         cls_pred = self.cls_pred(cls_feat)
			
 
				         reg_pred = self.reg_pred(reg_feat)
			
--- a/models/detectors/ctrnet/loss.py
+++ b/models/detectors/ctrnet/loss.py
@@ -102,13 +102,13 @@ class Criterion(object):
 
				                     'losses':  (torch.Tensor) It is a scalar.),
			
 
				                 }
			
 
				         """
			
 
				-        bs = outputs['pred_cls'][0].shape[0]
			
 
				-        device = outputs['pred_cls'][0].device
			
 
				-        fpn_strides = outputs['strides']
			
 
				+        bs = outputs['pred_cls'].shape[0]
			
 
				+        device = outputs['pred_cls'].device
			
 
				+        stride = outputs['stride']
			
 
				         anchors = outputs['anchors']
			
 
				         # preds: [B, M, C]
			
 
				-        cls_preds = torch.cat(outputs['pred_cls'], dim=1)
			
 
				-        box_preds = torch.cat(outputs['pred_box'], dim=1)
			
 
				+        cls_preds = outputs['pred_cls']
			
 
				+        box_preds = outputs['pred_box']
			
 
				         
			
 
				         # --------------- label assignment ---------------
			
 
				         cls_targets = []
			
@@ -118,15 +118,15 @@ class Criterion(object):
 
				             tgt_labels = targets[batch_idx]["labels"].to(device)  # [N,]
			
 
				             tgt_bboxes = targets[batch_idx]["boxes"].to(device)   # [N, 4]
			
 
				             if not aux_loss:
			
 
				-                assigned_result = self.matcher(fpn_strides=fpn_strides,
			
 
				-                                            anchors=anchors,
			
 
				-                                            pred_cls=cls_preds[batch_idx].detach(),
			
 
				-                                            pred_box=box_preds[batch_idx].detach(),
			
 
				-                                            gt_labels=tgt_labels,
			
 
				-                                            gt_bboxes=tgt_bboxes
			
 
				-                                            )
			
 
				+                assigned_result = self.matcher(stride=stride,
			
 
				+                                               anchors=anchors,
			
 
				+                                               pred_cls=cls_preds[batch_idx].detach(),
			
 
				+                                               pred_box=box_preds[batch_idx].detach(),
			
 
				+                                               gt_labels=tgt_labels,
			
 
				+                                               gt_bboxes=tgt_bboxes
			
 
				+                                               )
			
 
				             else:
			
 
				-                assigned_result = self.aux_matcher(fpn_strides=fpn_strides,
			
 
				+                assigned_result = self.aux_matcher(stride=stride,
			
 
				                                                    anchors=anchors,
			
 
				                                                    pred_cls=cls_preds[batch_idx].detach(),
			
 
				                                                    pred_box=box_preds[batch_idx].detach(),
			
@@ -170,13 +170,13 @@ class Criterion(object):
 
				         loss_box_aux = None
			
 
				         if epoch >= (self.max_epoch - self.no_aug_epoch - 1):
			
 
				             ## reg_preds
			
 
				-            reg_preds = torch.cat(outputs['pred_reg'], dim=1)
			
 
				+            reg_preds = outputs['pred_reg']
			
 
				             reg_preds_pos = reg_preds.view(-1, 4)[pos_inds]
			
 
				             ## anchor tensors
			
 
				-            anchors_tensors = torch.cat(outputs['anchors'], dim=0)[None].repeat(bs, 1, 1)
			
 
				+            anchors_tensors = outputs['anchors'][None].repeat(bs, 1, 1)
			
 
				             anchors_tensors_pos = anchors_tensors.view(-1, 2)[pos_inds]
			
 
				             ## stride tensors
			
 
				-            stride_tensors = torch.cat(outputs['stride_tensors'], dim=0)[None].repeat(bs, 1, 1)
			
 
				+            stride_tensors = outputs['stride_tensors'][None].repeat(bs, 1, 1)
			
 
				             stride_tensors_pos = stride_tensors.view(-1, 1)[pos_inds]
			
 
				             ## aux loss
			
 
				             loss_box_aux = self.loss_bboxes_aux(reg_preds_pos, box_targets_pos, anchors_tensors_pos, stride_tensors_pos)
			
@@ -216,7 +216,7 @@ class Criterion(object):
 
				                 loss_dict[k] = main_loss_dict[k]
			
 
				         for k in aux_loss_dict:
			
 
				             if k != 'losses':
			
 
				-                loss_dict[k] = main_loss_dict[k]
			
 
				+                loss_dict[k+'_aux'] = aux_loss_dict[k]
			
 
				         
			
 
				         return loss_dict
			
 
				 
			
--- a/models/detectors/ctrnet/matcher.py
+++ b/models/detectors/ctrnet/matcher.py
@@ -16,18 +16,15 @@ class AlignedSimOTA(object):
 
				 
			
 
				     @torch.no_grad()
			
 
				     def __call__(self, 
			
 
				-                 fpn_strides, 
			
 
				+                 stride, 
			
 
				                  anchors, 
			
 
				                  pred_cls, 
			
 
				                  pred_box, 
			
 
				                  gt_labels,
			
 
				                  gt_bboxes):
			
 
				         # [M,]
			
 
				-        strides = torch.cat([torch.ones_like(anchor_i[:, 0]) * stride_i
			
 
				-                                for stride_i, anchor_i in zip(fpn_strides, anchors)], dim=-1)
			
 
				-        # List[F, M, 2] -> [M, 2]
			
 
				+        stride_tensor = torch.ones_like(anchors[:, 0]) * stride
			
 
				         num_gt = len(gt_labels)
			
 
				-        anchors = torch.cat(anchors, dim=0)
			
 
				 
			
 
				         # check gt
			
 
				         if num_gt == 0 or gt_bboxes.max().item() == 0.:
			
@@ -46,7 +43,7 @@ class AlignedSimOTA(object):
 
				         # ----------------------------------- soft center prior -----------------------------------
			
 
				         gt_center = (gt_bboxes[..., :2] + gt_bboxes[..., 2:]) / 2.0
			
 
				         distance = (anchors.unsqueeze(0) - gt_center.unsqueeze(1)
			
 
				-                    ).pow(2).sum(-1).sqrt() / strides.unsqueeze(0)  # [N, M]
			
 
				+                    ).pow(2).sum(-1).sqrt() / stride_tensor.unsqueeze(0)  # [N, M]
			
 
				         distance = distance * valid_mask.unsqueeze(0)
			
 
				         soft_center_prior = torch.pow(10, distance - self.soft_center_radius)