yjh0410 11 months ago
parent
commit
80efe8c210
2 changed files with 55 additions and 68 deletions
  1. 4 4
      yolo/models/yolov2/matcher.py
  2. 51 64
      yolo/models/yolov2/yolov2.py

+ 4 - 4
yolo/models/yolov2/matcher.py

@@ -12,21 +12,21 @@ class Yolov2Matcher(object):
         self.anchor_boxes = np.array(
             [[0., 0., anchor[0], anchor[1]]
             for anchor in anchor_size]
-            )  # [KA, 4]
+            )  # [K, 4]
 
 
     def compute_iou(self, anchor_boxes, gt_box):
         """
-            anchor_boxes : ndarray -> [KA, 4] (cx, cy, bw, bh).
+            anchor_boxes : ndarray -> [K, 4] (cx, cy, bw, bh).
             gt_box : ndarray -> [1, 4] (cx, cy, bw, bh).
         """
-        # anchors: [KA, 4]
+        # anchors: [K, 4]
         anchors = np.zeros_like(anchor_boxes)
         anchors[..., :2] = anchor_boxes[..., :2] - anchor_boxes[..., 2:] * 0.5  # x1y1
         anchors[..., 2:] = anchor_boxes[..., :2] + anchor_boxes[..., 2:] * 0.5  # x2y2
         anchors_area = anchor_boxes[..., 2] * anchor_boxes[..., 3]
         
-        # gt_box: [1, 4] -> [KA, 4]
+        # gt_box: [1, 4] -> [K, 4]
         gt_box = np.array(gt_box).reshape(-1, 4)
         gt_box = np.repeat(gt_box, anchors.shape[0], axis=0)
         gt_box_ = np.zeros_like(gt_box)

+ 51 - 64
yolo/models/yolov2/yolov2.py

@@ -38,71 +38,58 @@ class Yolov2(nn.Module):
         """
         We process predictions at each scale hierarchically
         Input:
-            obj_preds: List[torch.Tensor] -> [[B, M, 1], ...], B=1
-            cls_preds: List[torch.Tensor] -> [[B, M, C], ...], B=1
-            box_preds: List[torch.Tensor] -> [[B, M, 4], ...], B=1
+            obj_preds: torch.Tensor -> [B, M, 1], B=1
+            cls_preds: torch.Tensor -> [B, M, C], B=1
+            box_preds: torch.Tensor -> [B, M, 4], B=1
         Output:
             bboxes: np.array -> [N, 4]
             scores: np.array -> [N,]
             labels: np.array -> [N,]
-        """
-        all_scores = []
-        all_labels = []
-        all_bboxes = []
-        
-        for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            obj_pred_i = obj_pred_i[0]
-            cls_pred_i = cls_pred_i[0]
-            box_pred_i = box_pred_i[0]
-            if self.no_multi_labels:
-                # [M,]
-                scores, labels = torch.max(
-                    torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
-
-                # Keep top k top scoring indices only.
-                num_topk = min(self.topk_candidates, box_pred_i.size(0))
-
-                # topk candidates
-                predicted_prob, topk_idxs = scores.sort(descending=True)
-                topk_scores = predicted_prob[:num_topk]
-                topk_idxs = topk_idxs[:num_topk]
-
-                # filter out the proposals with low confidence score
-                keep_idxs = topk_scores > self.conf_thresh
-                scores = topk_scores[keep_idxs]
-                topk_idxs = topk_idxs[keep_idxs]
-
-                labels = labels[topk_idxs]
-                bboxes = box_pred_i[topk_idxs]
-            else:
-                # [M, C] -> [MC,]
-                scores_i = torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()).flatten()
-
-                # Keep top k top scoring indices only.
-                num_topk = min(self.topk_candidates, box_pred_i.size(0))
-
-                # torch.sort is actually faster than .topk (at least on GPUs)
-                predicted_prob, topk_idxs = scores_i.sort(descending=True)
-                topk_scores = predicted_prob[:num_topk]
-                topk_idxs = topk_idxs[:num_topk]
-
-                # filter out the proposals with low confidence score
-                keep_idxs = topk_scores > self.conf_thresh
-                scores = topk_scores[keep_idxs]
-                topk_idxs = topk_idxs[keep_idxs]
-
-                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-                labels = topk_idxs % self.num_classes
-
-                bboxes = box_pred_i[anchor_idxs]
-
-            all_scores.append(scores)
-            all_labels.append(labels)
-            all_bboxes.append(bboxes)
-
-        scores = torch.cat(all_scores, dim=0)
-        labels = torch.cat(all_labels, dim=0)
-        bboxes = torch.cat(all_bboxes, dim=0)
+        """        
+        obj_preds = obj_preds[0]
+        cls_preds = cls_preds[0]
+        box_preds = box_preds[0]
+        if self.no_multi_labels:
+            # [M,]
+            scores, labels = torch.max(
+                torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid()), dim=1)
+
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, box_preds.size(0))
+
+            # topk candidates
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
+
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
+
+            labels = labels[topk_idxs]
+            bboxes = box_preds[topk_idxs]
+        else:
+            # [M, C] -> [MC,]
+            scores = torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid()).flatten()
+
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, box_preds.size(0))
+
+            # torch.sort is actually faster than .topk (at least on GPUs)
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
+
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
+
+            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+            labels = topk_idxs % self.num_classes
+
+            bboxes = box_preds[anchor_idxs]
 
         # to cpu & numpy
         scores = scores.cpu().numpy()
@@ -130,9 +117,9 @@ class Yolov2(nn.Module):
         outputs['image_size'] = [x.shape[2], x.shape[3]]
 
         if not self.training:
-            all_obj_preds = [outputs['pred_obj'],]
-            all_cls_preds = [outputs['pred_cls'],]
-            all_box_preds = [outputs['pred_box'],]
+            all_obj_preds = outputs['pred_obj']
+            all_cls_preds = outputs['pred_cls']
+            all_box_preds = outputs['pred_box']
 
             # post process
             bboxes, scores, labels = self.post_process(