Browse Source

modify some notes in yolov1

yjh0410 1 year ago
parent
commit
9d7e807b67
3 changed files with 79 additions and 90 deletions
  1. 52 65
      yolo/models/yolov1/yolov1.py
  2. 12 11
      yolo/models/yolov1/yolov1_backbone.py
  3. 15 14
      yolo/models/yolov1/yolov1_neck.py

+ 52 - 65
yolo/models/yolov1/yolov1.py

@@ -38,71 +38,58 @@ class Yolov1(nn.Module):
         """
         We process predictions at each scale hierarchically
         Input:
-            obj_preds: List[torch.Tensor] -> [[B, M, 1], ...], B=1
-            cls_preds: List[torch.Tensor] -> [[B, M, C], ...], B=1
-            box_preds: List[torch.Tensor] -> [[B, M, 4], ...], B=1
+            obj_preds: torch.Tensor -> [B, M, 1], B=1
+            cls_preds: torch.Tensor -> [B, M, C], B=1
+            box_preds: torch.Tensor -> [B, M, 4], B=1
         Output:
             bboxes: np.array -> [N, 4]
             scores: np.array -> [N,]
             labels: np.array -> [N,]
-        """
-        all_scores = []
-        all_labels = []
-        all_bboxes = []
-        
-        for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            obj_pred_i = obj_pred_i[0]
-            cls_pred_i = cls_pred_i[0]
-            box_pred_i = box_pred_i[0]
-            if self.no_multi_labels:
-                # [M,]
-                scores, labels = torch.max(
-                    torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
-
-                # Keep top k top scoring indices only.
-                num_topk = min(self.topk_candidates, box_pred_i.size(0))
-
-                # topk candidates
-                predicted_prob, topk_idxs = scores.sort(descending=True)
-                topk_scores = predicted_prob[:num_topk]
-                topk_idxs = topk_idxs[:num_topk]
-
-                # filter out the proposals with low confidence score
-                keep_idxs = topk_scores > self.conf_thresh
-                scores = topk_scores[keep_idxs]
-                topk_idxs = topk_idxs[keep_idxs]
-
-                labels = labels[topk_idxs]
-                bboxes = box_pred_i[topk_idxs]
-            else:
-                # [M, C] -> [MC,]
-                scores_i = torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()).flatten()
-
-                # Keep top k top scoring indices only.
-                num_topk = min(self.topk_candidates, box_pred_i.size(0))
-
-                # torch.sort is actually faster than .topk (at least on GPUs)
-                predicted_prob, topk_idxs = scores_i.sort(descending=True)
-                topk_scores = predicted_prob[:num_topk]
-                topk_idxs = topk_idxs[:num_topk]
-
-                # filter out the proposals with low confidence score
-                keep_idxs = topk_scores > self.conf_thresh
-                scores = topk_scores[keep_idxs]
-                topk_idxs = topk_idxs[keep_idxs]
-
-                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-                labels = topk_idxs % self.num_classes
-
-                bboxes = box_pred_i[anchor_idxs]
-
-            all_scores.append(scores)
-            all_labels.append(labels)
-            all_bboxes.append(bboxes)
-
-        scores = torch.cat(all_scores, dim=0)
-        labels = torch.cat(all_labels, dim=0)
-        bboxes = torch.cat(all_bboxes, dim=0)
+        """        
+        obj_preds = obj_preds[0]
+        cls_preds = cls_preds[0]
+        box_preds = box_preds[0]
+        if self.no_multi_labels:
+            # [M,]
+            scores, labels = torch.max(
+                torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid()), dim=1)
+
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, box_preds.size(0))
+
+            # topk candidates
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
+
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
+
+            labels = labels[topk_idxs]
+            bboxes = box_preds[topk_idxs]
+        else:
+            # [M, C] -> [MC,]
+            scores = torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid()).flatten()
+
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, box_preds.size(0))
+
+            # torch.sort is actually faster than .topk (at least on GPUs)
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
+
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
+
+            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+            labels = topk_idxs % self.num_classes
+
+            bboxes = box_preds[anchor_idxs]
 
         # to cpu & numpy
         scores = scores.cpu().numpy()
@@ -130,13 +117,13 @@ class Yolov1(nn.Module):
         outputs['image_size'] = [x.shape[2], x.shape[3]]
 
         if not self.training:
-            all_obj_preds = [outputs['pred_obj'],]
-            all_cls_preds = [outputs['pred_cls'],]
-            all_box_preds = [outputs['pred_box'],]
+            obj_preds = outputs['pred_obj']
+            cls_preds = outputs['pred_cls']
+            box_preds = outputs['pred_box']
 
             # post process
             bboxes, scores, labels = self.post_process(
-                all_obj_preds, all_cls_preds, all_box_preds)
+                obj_preds, cls_preds, box_preds)
             outputs = {
                 "scores": scores,
                 "labels": labels,

+ 12 - 11
yolo/models/yolov1/yolov1_backbone.py

@@ -20,9 +20,9 @@ class Yolov1Backbone(nn.Module):
 
 
 if __name__=='__main__':
-    import time
     from thop import profile
-    # YOLOv8-Base config
+
+    # YOLOv1 configuration
     class Yolov1BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
@@ -31,20 +31,21 @@ if __name__=='__main__':
             ## Backbone
             self.backbone       = 'resnet18'
             self.use_pretrained = True
-
     cfg = Yolov1BaseConfig()
+
     # Build backbone
     model = Yolov1Backbone(cfg)
 
+    # Randomly generate a input data
+    x = torch.randn(2, 3, 640, 640)
+
     # Inference
-    x = torch.randn(1, 3, 640, 640)
-    t0 = time.time()
     output = model(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print(output.shape)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
 
+    x = torch.randn(1, 3, 640, 640)
     flops, params = profile(model, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))    
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))

+ 15 - 14
yolo/models/yolov1/yolov1_neck.py

@@ -52,7 +52,7 @@ if __name__=='__main__':
     from thop import profile
     # Model config
     
-    # YOLOv8-Base config
+    # YOLOv1 configuration
     class Yolov1BaseConfig(object):
         def __init__(self) -> None:
             # ---------------- Model config ----------------
@@ -64,22 +64,23 @@ if __name__=='__main__':
             self.neck_depthwise = False
             self.neck_expand_ratio = 0.5
             self.spp_pooling_size  = 5
-
     cfg = Yolov1BaseConfig()
-    # Build a head
+
+    # Build a neck
     in_dim  = 512
     out_dim = 512
-    neck = SPPF(cfg, 512, 512)
+    model = SPPF(cfg, 512, 512)
+
+    # Randomly generate a input data
+    x = torch.randn(2, in_dim, 20, 20)
 
     # Inference
-    x = torch.randn(1, in_dim, 20, 20)
-    t0 = time.time()
-    output = neck(x)
-    t1 = time.time()
-    print('Time: ', t1 - t0)
-    print('Neck output: ', output.shape)
+    output = model(x)
+    print(' - the shape of input :  ', x.shape)
+    print(' - the shape of output : ', output.shape)
 
-    flops, params = profile(neck, inputs=(x, ), verbose=False)
-    print('==============================')
-    print('GFLOPs : {:.2f}'.format(flops / 1e9 * 2))
-    print('Params : {:.2f} M'.format(params / 1e6))
+    x = torch.randn(1, in_dim, 20, 20)
+    flops, params = profile(model, inputs=(x, ), verbose=False)
+    print('============== FLOPs & Params ================')
+    print(' - FLOPs  : {:.2f} G'.format(flops / 1e9 * 2))
+    print(' - Params : {:.2f} M'.format(params / 1e6))