yjh0410 hai 1 ano
pai
achega
3135cae7f6

+ 1 - 1
models/detectors/rtdetr/rtdetr_encoder.py

@@ -27,7 +27,7 @@ class ImageEncoder(nn.Module):
         
         # ---------------- Network settings ----------------
         ## Backbone Network
-        self.backbone, fpn_feat_dims = build_backbone(cfg, pretrained=cfg['pretrained']&self.training)
+        self.backbone, fpn_feat_dims = build_backbone(cfg, pretrained=cfg['pretrained'])
 
         ## Feature Pyramid Network
         self.fpn = build_fpn(cfg, fpn_feat_dims, self.hidden_dim)

+ 1 - 1
models/detectors/rtpdetr/rtpdetr_decoder.py

@@ -270,7 +270,7 @@ class PlainDETRTransformer(nn.Module):
 
         # Prepare input for decoder
         memory = src_flatten
-        bs, seq_l, c = memory.shape
+        bs, _, c = memory.shape
 
         # Two stage trick
         if self.training:

+ 7 - 7
models/detectors/rtpdetr/rtpdetr_encoder.py

@@ -35,19 +35,19 @@ class ImageEncoder(nn.Module):
         self.input_proj = BasicConv(fpn_feat_dims[-1], cfg['hidden_dim'], kernel_size=1, act_type=None, norm_type='BN')
 
         # ---------------- Transformer Encoder ----------------
-        self.transformer_encoder = TransformerEncoder(d_model        = cfg['hidden_dim'],
-                                                      num_heads      = cfg['en_num_heads'],
-                                                      num_layers     = cfg['en_num_layers'],
-                                                      mlp_ratio      = cfg['en_mlp_ratio'],
-                                                      dropout        = cfg['en_dropout'],
-                                                      act_type       = cfg['en_act']
+        self.transformer_encoder = TransformerEncoder(d_model     = cfg['hidden_dim'],
+                                                      num_heads   = cfg['en_num_heads'],
+                                                      num_layers  = cfg['en_num_layers'],
+                                                      mlp_ratio   = cfg['en_mlp_ratio'],
+                                                      dropout     = cfg['en_dropout'],
+                                                      act_type    = cfg['en_act']
                                                       )
 
         ## Upsample layer
         self.upsample = UpSampleWrapper(cfg['hidden_dim'], self.upsample_factor)
         
         ## Output projection
-        self.output_proj = BasicConv(cfg['hidden_dim'], cfg['hidden_dim'], kernel_size=1, act_type=None, norm_type='BN')
+        self.output_proj = BasicConv(cfg['hidden_dim'], cfg['hidden_dim'], kernel_size=3, padding=1, act_type='silu', norm_type='BN')
 
 
     def forward(self, x):