Browse Source

update yolov2

yjh0410 11 months ago
parent
commit
1230d06ad5

+ 140 - 0
yolo/models/yolov2/darknet19.py

@@ -0,0 +1,140 @@
+import torch
+import torch.nn as nn
+
+# ImageNet pretrained weight
+pretrained_urls = {
+    "darknet19": "https://github.com/yjh0410/image_classification_pytorch/releases/download/weight/darknet19.pth",
+}
+
+
+# --------------------- Basic Module -----------------------
+class ConvModule(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: int,
+                 padding: int = 0,
+                 stride: int = 1,
+                 dilation: int = 1,
+                 ):
+        super(ConvModule, self).__init__()
+        self.convs = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size, padding=padding, stride=stride, dilation=dilation),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.1, inplace=True)
+        )
+
+    def forward(self, x):
+        return self.convs(x)
+
+
+class DarkNet19(nn.Module):
+    def __init__(self, use_pretrained=False):
+        super(DarkNet19, self).__init__()
+
+        # output : stride = 2, c = 32
+        self.conv_1 = nn.Sequential(
+            ConvModule(3, 32, kernel_size=3, padding=1),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
+        )
+
+        # output : stride = 4, c = 64
+        self.conv_2 = nn.Sequential(
+            ConvModule(32, 64, kernel_size=3, padding=1),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
+        )
+
+        # output : stride = 8, c = 128
+        self.conv_3 = nn.Sequential(
+            ConvModule(64, 128, kernel_size=3, padding=1),
+            ConvModule(128, 64, 1),
+            ConvModule(64, 128, kernel_size=3, padding=1),
+            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
+        )
+
+        # output : stride = 8, c = 256
+        self.conv_4 = nn.Sequential(
+            ConvModule(128, 256, kernel_size=3, padding=1),
+            ConvModule(256, 128, 1),
+            ConvModule(128, 256, kernel_size=3, padding=1),
+        )
+
+        # output : stride = 16, c = 512
+        self.maxpool_4 = nn.MaxPool2d((2, 2), 2)
+        self.conv_5 = nn.Sequential(
+            ConvModule(256, 512, kernel_size=3, padding=1),
+            ConvModule(512, 256, 1),
+            ConvModule(256, 512, kernel_size=3, padding=1),
+            ConvModule(512, 256, 1),
+            ConvModule(256, 512, kernel_size=3, padding=1),
+        )
+        
+        # output : stride = 32, c = 1024
+        self.maxpool_5 = nn.MaxPool2d((2, 2), 2)
+        self.conv_6 = nn.Sequential(
+            ConvModule(512, 1024, kernel_size=3, padding=1),
+            ConvModule(1024, 512, 1),
+            ConvModule(512, 1024, kernel_size=3, padding=1),
+            ConvModule(1024, 512, 1),
+            ConvModule(512, 1024, kernel_size=3, padding=1)
+        )
+
+        if use_pretrained:
+            self.load_pretrained()
+
+    def load_pretrained(self):
+        url = pretrained_urls["darknet19"]
+        if url is not None:
+            print(' - Loading backbone pretrained weight from : {}'.format(url))
+            # checkpoint state dict
+            checkpoint_state_dict = torch.hub.load_state_dict_from_url(
+                url=url, map_location="cpu", check_hash=True)
+
+            # model state dict
+            model_state_dict = self.state_dict()
+
+            # check
+            for k in list(checkpoint_state_dict.keys()):
+                if k in model_state_dict:
+                    shape_model = tuple(model_state_dict[k].shape)
+                    shape_checkpoint = tuple(checkpoint_state_dict[k].shape)
+                    if shape_model != shape_checkpoint:
+                        checkpoint_state_dict.pop(k)
+                else:
+                    checkpoint_state_dict.pop(k)
+                    print('Unused key: ', k)
+            # load the weight
+            self.load_state_dict(checkpoint_state_dict)
+        else:
+            print(' - No pretrained weight for darknet-19.')
+
+    def forward(self, x):
+        c1 = self.conv_1(x)                    # c1
+        c2 = self.conv_2(c1)                   # c2
+        c3 = self.conv_3(c2)                   # c3
+        c3 = self.conv_4(c3)                   # c3
+        c4 = self.conv_5(self.maxpool_4(c3))   # c4
+        c5 = self.conv_6(self.maxpool_5(c4))   # c5
+
+        return c5
+
+
+if __name__ == '__main__':
+    from thop import profile
+
+    # Build model
+    model = DarkNet19(use_pretrained=True)
+
+    # Randomly generate a input data
+    x = torch.randn(2, 3, 640, 640)
+
+    # Inference
+    output = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
+
+    x = torch.randn(1, 3, 640, 640)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 12 - 12
yolo/models/yolov2/yolov2_backbone.py

@@ -20,9 +20,9 @@ class Yolov2Backbone(nn.Module):
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # YOLOv8-Base config
+
+    # YOLOv1 configuration
     class Yolov2BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
@@ -31,21 +31,21 @@ if __name__=='__main__':
             ## Backbone
             self.backbone       = 'resnet18'
             self.use_pretrained = True
-
     cfg = Yolov2BaseConfig()
+
     # Build backbone
     model = Yolov2Backbone(cfg)
 
+    # Randomly generate a input data
+    x = torch.randn(2, 3, 640, 640)
+
     # Inference
-    x = torch.randn(1, 3, 640, 640)
-    t0 = time.time()
     output = model(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print(output.shape)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
 
-    print('==============================')
+    x = torch.randn(1, 3, 640, 640)
     flops, params = profile(model, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))    
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 16 - 21
yolo/models/yolov2/yolov2_head.py

@@ -54,39 +54,34 @@ class Yolov2DetHead(nn.Module):
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # Model config
     
-    # YOLOv8-Base config
+    # YOLOv2 configuration
     class Yolov2BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
             self.out_stride = 32
             self.max_stride = 32
             ## Head
-            self.head_act  = 'lrelu'
-            self.head_norm = 'BN'
-            self.head_depthwise = False
             self.head_dim  = 256
-            self.num_cls_head   = 2
-            self.num_reg_head   = 2
-
+            self.num_cls_head = 2
+            self.num_reg_head = 2
     cfg = Yolov2BaseConfig()
+
     # Build a head
-    head = Yolov2DetHead(cfg, 512)
+    model = Yolov2DetHead(cfg, 512)
 
+    # Randomly generate a input data
+    x = torch.randn(2, 512, 20, 20)
 
     # Inference
-    x = torch.randn(1, 512, 20, 20)
-    t0 = time.time()
-    cls_feat, reg_feat = head(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print(cls_feat.shape, reg_feat.shape)
+    cls_feats, reg_feats = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of cls feats : ', cls_feats.shape)
+    print(' - the shape of reg feats : ', reg_feats.shape)
 
-    print('==============================')
-    flops, params = profile(head, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))    
+    x = torch.randn(1, 512, 20, 20)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 15 - 19
yolo/models/yolov2/yolov2_neck.py

@@ -40,38 +40,34 @@ class SPPF(nn.Module):
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # Model config
     
-    # YOLOv2-Base config
+    # YOLOv2 configuration
     class Yolov2BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
             self.out_stride = 32
             self.max_stride = 32
             ## Neck
-            self.neck_act       = 'lrelu'
-            self.neck_norm      = 'BN'
-            self.neck_depthwise = False
             self.neck_expand_ratio = 0.5
             self.spp_pooling_size  = 5
-
     cfg = Yolov2BaseConfig()
-    # Build a head
+
+    # Build a neck
     in_dim  = 512
     out_dim = 512
-    neck = SPPF(cfg, in_dim, out_dim)
+    model = SPPF(cfg, 512, 512)
+
+    # Randomly generate a input data
+    x = torch.randn(2, in_dim, 20, 20)
 
     # Inference
-    x = torch.randn(1, in_dim, 20, 20)
-    t0 = time.time()
-    output = neck(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('Neck output: ', output.shape)
+    output = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
 
-    flops, params = profile(neck, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    x = torch.randn(1, in_dim, 20, 20)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))