Jelajahi Sumber

update FCOS-R50-1x

yjh0410 1 tahun lalu
induk
melakukan
fa5cbf95b5

+ 1 - 1
odlab/models/detectors/fcos/README.md

@@ -6,7 +6,7 @@
 | Model          |  scale     |  FPS<sup>FP32<br>RTX 4060  | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | Weight | Logs  |
 | ---------------| ---------- | -------------------------- | ---------------------- |  ---------------  | ------ | ----- |
 | FCOS_R18_1x    |  800,1333  |            24              |          34.0          |        52.2       | [ckpt](https://github.com/yjh0410/YOLO-Tutorial-v2/releases/download/yolo_tutorial_ckpt/fcos_r18_1x_coco.pth) | [log](https://github.com/yjh0410/YOLO-Tutorial-v2/releases/download/yolo_tutorial_ckpt/FCOS-R18-1x.txt) |
-| FCOS_R50_1x    |  800,1333  |                          |                    |               | [ckpt]() | [Logs]() |
+| FCOS_R50_1x    |  800,1333  |                          |          39.0          |        58.3       | [ckpt](https://github.com/yjh0410/YOLO-Tutorial-v2/releases/download/yolo_tutorial_ckpt/fcos_r50_1x_coco.pth) | [log](https://github.com/yjh0410/YOLO-Tutorial-v2/releases/download/yolo_tutorial_ckpt/FCOS-R50-1x.txt) |
 | FCOS_RT_R18_3x |  512,736   |                          |                    |               | [ckpt]() | [Logs]() |
 | FCOS_RT_R50_3x |  512,736   |                          |                    |               | [ckpt]() | [Logs]() |
 

+ 2 - 2
yolo/models/yolof/yolof.py

@@ -30,7 +30,7 @@ class Yolof(nn.Module):
         
         # ---------------------- Network Parameters ----------------------
         self.backbone  = YolofBackbone(cfg)
-        self.upsampler = YolofUpsampler(cfg, self.backbone.feat_dims[-1], cfg.head_dim)
+        self.upsampler = YolofUpsampler(cfg, self.backbone.feat_dims, cfg.head_dim)
         self.encoder   = YolofEncoder(cfg, cfg.head_dim, cfg.head_dim)
         self.decoder   = YolofDecoder(cfg, self.encoder.out_dim)
 
@@ -117,7 +117,7 @@ class Yolof(nn.Module):
         pyramid_feats = self.backbone(x)
 
         # ---------------- Encoder ----------------
-        x = self.upsampler(pyramid_feats[-1])
+        x = self.upsampler(pyramid_feats)
         x = self.encoder(x)
 
         # ---------------- Decoder ----------------

+ 1 - 1
yolo/models/yolof/yolof_encoder.py

@@ -44,7 +44,7 @@ class Bottleneck(nn.Module):
 
         return x + self.branch(x) if self.shortcut else h
 
-# Dilated Encoder
+# CSP-style Dilated Encoder
 class YolofEncoder(nn.Module):
     def __init__(self, cfg, in_dim, out_dim):
         super(YolofEncoder, self).__init__()

+ 15 - 14
yolo/models/yolof/yolof_upsampler.py

@@ -8,22 +8,23 @@ except:
 
 
 class YolofUpsampler(nn.Module):
-    def __init__(self, cfg, in_dim, out_dim):
+    def __init__(self, cfg, in_dims, out_dim):
         super(YolofUpsampler, self).__init__()
-        # ----------- Basic parameters -----------
-        self.upscale_factor = cfg.upscale_factor
-        inter_dim = self.upscale_factor ** 2 * in_dim
         # ----------- Model parameters -----------
-        self.input_proj = BasicConv(in_dim, inter_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
-        self.output_proj = BasicConv(in_dim, out_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
+        self.input_proj_1 = BasicConv(in_dims[-1], out_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
+        self.input_proj_2 = BasicConv(in_dims[-2], out_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm)
+        self.output_proj  = nn.Sequential(
+            BasicConv(out_dim * 2, out_dim, kernel_size=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm),
+            BasicConv(out_dim, out_dim, kernel_size=3, padding=1, stride=1, act_type=cfg.neck_act, norm_type=cfg.neck_norm),
+        )
 
-    def forward(self, x):
-        # [B, C, H, W] -> [B, 4*C, H, W]
-        x = self.input_proj(x)
-
-        # [B, 4*C, H, W] -> [B, C, 2*H, 2*W]
-        x = torch.pixel_shuffle(x, upscale_factor=self.upscale_factor)
+    def forward(self, pyramid_feats):
+        x1 = self.input_proj_1(pyramid_feats[-1])
+        x2 = self.input_proj_2(pyramid_feats[-2])
         
-        x = self.output_proj(x)
+        x1_up = nn.functional.interpolate(x1, scale_factor=2.0)
+
+        x3 = torch.cat([x2, x1_up], dim=1)
+        out = self.output_proj(x3)
         
-        return x
+        return out

+ 1 - 1
yolo/train.py

@@ -196,7 +196,7 @@ def train():
         trainer.eval(model_eval)
         return
 
-    garbage = torch.randn(640, 1024, 75, 75).to(device) # 15 G
+    # garbage = torch.randn(640, 1024, 75, 75).to(device) # 15 G
 
     # ---------------------------- Train pipeline ----------------------------
     trainer.train(model)