yjh0410 1 năm trước cách đây
mục cha
commit
f334b4646c

+ 3 - 3
config/data_config/transform_config.py

@@ -314,7 +314,7 @@ rtdetr_s_trans_config = {
     },
     # Mosaic & Mixup
     'mosaic_keep_ratio': False,
-    'mosaic_prob': 1.0,
+    'mosaic_prob': 0.0,
     'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
     'mixup_type':  'yolov5',
@@ -339,8 +339,8 @@ rtdetr_l_trans_config = {
     },
     # Mosaic & Mixup
     'mosaic_keep_ratio': False,
-    'mosaic_prob': 1.0,
-    'mixup_prob':  0.15,
+    'mosaic_prob': 0.0,
+    'mixup_prob':  0.0,
     'mosaic_type': 'yolov5',
     'mixup_type':  'yolov5',
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp, just for YOLOXMixup

+ 2 - 2
config/model_config/rtdetr_config.py

@@ -10,8 +10,8 @@ rtdetr_cfg = {
         'backbone_norm': 'BN',
         'pretrained': True,
         'pretrained_weight': 'imagenet1k_v1',
-        'freeze_at': -1,
-        'freeze_stem_only': True,
+        'freeze_at': 0,
+        'freeze_stem_only': False,
         'out_stride': [8, 16, 32],
         'max_stride': 32,
         ## Image Encoder - FPN

+ 3 - 1
engine.py

@@ -1140,7 +1140,7 @@ class RTDetrTrainer(object):
         self.args.fp16 = False
         # weak augmentatino stage
         self.second_stage = False
-        self.second_stage_epoch = 5
+        self.second_stage_epoch = -1
         # path to save model
         self.path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
         os.makedirs(self.path_to_save, exist_ok=True)
@@ -1160,6 +1160,8 @@ class RTDetrTrainer(object):
             args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=True)
         self.val_transform, _ = build_transform(
             args=args, trans_config=self.trans_cfg, max_stride=self.model_cfg['max_stride'], is_train=False)
+        if self.trans_cfg["mosaic_prob"] > 0:
+            self.second_stage_epoch = 5
 
         # ---------------------------- Build Dataset & Dataloader ----------------------------
         self.dataset, self.dataset_info = build_dataset(args, self.data_cfg, self.trans_cfg, self.train_transform, is_train=True)

+ 10 - 6
models/detectors/rtdetr/basic_modules/basic.py

@@ -1,4 +1,5 @@
 import numpy as np
+import copy
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
@@ -279,24 +280,27 @@ class RepRTCBlock(nn.Module):
                  ) -> None:
         super(RepRTCBlock, self).__init__()
         self.inter_dim = round(out_dim * expansion)
+        # ------------ Input & Output projection ------------
         self.conv1 = BasicConv(in_dim, self.inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
         self.conv2 = BasicConv(in_dim, self.inter_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
-        self.module = nn.ModuleList([RepVggBlock(self.inter_dim, self.inter_dim, act_type, norm_type)
-                                     for _ in range(num_blocks)])
-        self.conv3 = BasicConv(self.inter_dim, out_dim, kernel_size=3, padding=1, act_type=act_type, norm_type=norm_type)
-
+        self.conv3 = BasicConv(self.inter_dim * (2 + num_blocks), out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
+        # ------------ Core modules ------------
+        module = nn.Sequential(RepVggBlock(self.inter_dim, self.inter_dim, act_type, norm_type),
+                               RepVggBlock(self.inter_dim, self.inter_dim, act_type, norm_type),)
+        self.module = nn.ModuleList([copy.deepcopy(module) for _ in range(num_blocks)])
+        
     def forward(self, x):
         # Input proj
         x1 = self.conv1(x)
         x2 = self.conv2(x)
 
         # Core module
-        out = [x1]
+        out = [x1, x2]
         for m in self.module:
             x2 = m(x2)
             out.append(x2)
 
         # Output proj
-        out = self.conv3(sum(out))
+        out = self.conv3(torch.cat(out, dim=1))
 
         return out

+ 1 - 1
models/detectors/rtdetr/build.py

@@ -22,7 +22,7 @@ def build_rtdetr(args, cfg, num_classes=80, trainable=False, deploy=False):
                     nms_thresh      = args.nms_thresh,
                     conf_thresh     = args.conf_thresh,
                     topk            = 300,
-                    deploy          = deploy,
+                    onnx_deploy     = deploy,
                     no_multi_labels = args.no_multi_labels,
                     use_nms         = True,   # NMS is beneficial 
                     nms_class_agnostic = args.nms_class_agnostic

+ 13 - 4
models/detectors/rtdetr/rtdetr.py

@@ -19,7 +19,7 @@ class RT_DETR(nn.Module):
                  conf_thresh = 0.1,
                  nms_thresh  = 0.5,
                  topk        = 300,
-                 deploy      = False,
+                 onnx_deploy = False,
                  no_multi_labels = False,
                  use_nms     = False,
                  nms_class_agnostic = False,
@@ -28,7 +28,7 @@ class RT_DETR(nn.Module):
         # ----------- Basic setting -----------
         self.num_classes = num_classes
         self.num_topk = topk
-        self.deploy = deploy
+        self.onnx_deploy = onnx_deploy
         ## Post-process parameters
         self.use_nms = use_nms
         self.nms_thresh = nms_thresh
@@ -44,6 +44,13 @@ class RT_DETR(nn.Module):
         ## Detect decoder
         self.detect_decoder = build_transformer(cfg, self.fpn_dims, num_classes, return_intermediate=self.training)
 
+    def deploy(self):
+        assert not self.training
+        for m in self.modules():
+            if hasattr(m, 'convert_to_deploy'):
+                m.convert_to_deploy()
+        return self 
+
     def post_process(self, box_pred, cls_pred):
         # xywh -> xyxy
         box_preds_x1y1 = box_pred[..., :2] - 0.5 * box_pred[..., 2:]
@@ -96,7 +103,7 @@ class RT_DETR(nn.Module):
             topk_labels = topk_idxs % self.num_classes
             topk_bboxes = box_pred[topk_box_idxs]
 
-        if not self.deploy:
+        if not self.onnx_deploy:
             topk_scores = topk_scores.cpu().numpy()
             topk_labels = topk_labels.cpu().numpy()
             topk_bboxes = topk_bboxes.cpu().numpy()
@@ -155,7 +162,8 @@ if __name__ == '__main__':
         'max_stride': 32,
         # Image Encoder - FPN
         'fpn': 'hybrid_encoder',
-        'fpn_num_blocks': 4,
+        'fpn_num_blocks': 3,
+        'fpn_expansion': 0.5,
         'fpn_act': 'silu',
         'fpn_norm': 'BN',
         'fpn_depthwise': False,
@@ -217,6 +225,7 @@ if __name__ == '__main__':
     # Inference
     with torch.no_grad():
         model.eval()
+        model.deploy()
         t0 = time.time()
         outputs = model(image)
         t1 = time.time()