Selaa lähdekoodia

modify post-process

yjh0410 1 vuosi sitten
vanhempi
sitoutus
f73a52f516

+ 1 - 0
models/detectors/yolov2/build.py

@@ -25,6 +25,7 @@ def build_yolov2(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 40 - 18
models/detectors/yolov2/yolov2.py

@@ -20,6 +20,7 @@ class YOLOv2(nn.Module):
                  topk=100,
                  trainable=False,
                  deploy=False,
+                 no_multi_labels=False,
                  nms_class_agnostic=False):
         super(YOLOv2, self).__init__()
         # ------------------- Basic parameters -------------------
@@ -29,9 +30,10 @@ class YOLOv2(nn.Module):
         self.trainable = trainable                     # 训练的标记
         self.conf_thresh = conf_thresh                 # 得分阈值
         self.nms_thresh = nms_thresh                   # NMS阈值
-        self.topk = topk                               # topk
+        self.topk_candidates = topk                    # topk
         self.stride = 32                               # 网络的最大步长
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         # ------------------- Anchor box -------------------
         self.anchor_size = torch.as_tensor(cfg['anchor_size']).float().view(-1, 2) # [A, 2]
@@ -113,30 +115,50 @@ class YOLOv2(nn.Module):
             cls_pred: (Tensor) [H*W*A, C]
             reg_pred: (Tensor) [H*W*A, 4]
         """
+        if self.no_multi_labels:
+            # [M,]
+            scores, labels = torch.max(torch.sqrt(obj_pred.sigmoid() * cls_pred.sigmoid()), dim=1)
+
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, reg_pred.size(0))
+
+            # topk candidates
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
+
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
+
+            labels = labels[topk_idxs]
+            bboxes = self.decode_boxes(anchors[topk_idxs], reg_pred[topk_idxs])
+        else:
         # (H x W x A x C,)
-        scores = torch.sqrt(obj_pred.sigmoid() * cls_pred.sigmoid()).flatten()
+            scores = torch.sqrt(obj_pred.sigmoid() * cls_pred.sigmoid()).flatten()
 
-        # Keep top k top scoring indices only.
-        num_topk = min(self.topk, reg_pred.size(0))
+            # Keep top k top scoring indices only.
+            num_topk = min(self.topk_candidates, reg_pred.size(0))
 
-        # torch.sort is actually faster than .topk (at least on GPUs)
-        predicted_prob, topk_idxs = scores.sort(descending=True)
-        topk_scores = predicted_prob[:num_topk]
-        topk_idxs = topk_idxs[:num_topk]
+            # torch.sort is actually faster than .topk (at least on GPUs)
+            predicted_prob, topk_idxs = scores.sort(descending=True)
+            topk_scores = predicted_prob[:num_topk]
+            topk_idxs = topk_idxs[:num_topk]
 
-        # filter out the proposals with low confidence score
-        keep_idxs = topk_scores > self.conf_thresh
-        scores = topk_scores[keep_idxs]
-        topk_idxs = topk_idxs[keep_idxs]
+            # filter out the proposals with low confidence score
+            keep_idxs = topk_scores > self.conf_thresh
+            scores = topk_scores[keep_idxs]
+            topk_idxs = topk_idxs[keep_idxs]
 
-        anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-        labels = topk_idxs % self.num_classes
+            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+            labels = topk_idxs % self.num_classes
 
-        reg_pred = reg_pred[anchor_idxs]
-        anchors = anchors[anchor_idxs]
+            reg_pred = reg_pred[anchor_idxs]
+            anchors = anchors[anchor_idxs]
 
-        # 解算边界框, 并归一化边界框: [H*W*A, 4]
-        bboxes = self.decode_boxes(anchors, reg_pred)
+            # 解算边界框, 并归一化边界框: [H*W*A, 4]
+            bboxes = self.decode_boxes(anchors, reg_pred)
 
         # to cpu & numpy
         scores = scores.cpu().numpy()

+ 1 - 0
models/detectors/yolov3/build.py

@@ -25,6 +25,7 @@ def build_yolov3(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 47 - 21
models/detectors/yolov3/yolov3.py

@@ -20,6 +20,7 @@ class YOLOv3(nn.Module):
                  nms_thresh=0.5,
                  trainable=False,
                  deploy=False,
+                 no_multi_labels=False,
                  nms_class_agnostic=False):
         super(YOLOv3, self).__init__()
         # ------------------- Basic parameters -------------------
@@ -29,9 +30,10 @@ class YOLOv3(nn.Module):
         self.trainable = trainable                     # 训练的标记
         self.conf_thresh = conf_thresh                 # 得分阈值
         self.nms_thresh = nms_thresh                   # NMS阈值
-        self.topk = topk                               # topk
+        self.topk_candidates = topk                    # topk
         self.stride = [8, 16, 32]                      # 网络的输出步长
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         # ------------------- Anchor box -------------------
         self.num_levels = 3
@@ -103,36 +105,60 @@ class YOLOv3(nn.Module):
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         Input:
-            obj_preds: List(Tensor) [[H x W x A, 1], ...]
-            cls_preds: List(Tensor) [[H x W x A, C], ...]
-            box_preds: List(Tensor) [[H x W x A, 4], ...]
-            anchors:   List(Tensor) [[H x W x A, 2], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+            obj_preds: List[np.array] -> [[M, 1], ...] or None
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
         
         for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            # (H x W x KA x C,)
-            scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
 
-            bboxes = box_pred_i[anchor_idxs]
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
+
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
@@ -152,7 +178,7 @@ class YOLOv3(nn.Module):
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
 
         return bboxes, scores, labels
-
+    
 
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()

+ 1 - 0
models/detectors/yolov4/build.py

@@ -25,6 +25,7 @@ def build_yolov4(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 49 - 22
models/detectors/yolov4/yolov4.py

@@ -20,6 +20,7 @@ class YOLOv4(nn.Module):
                  topk=100,
                  trainable=False,
                  deploy=False,
+                 no_multi_labels=False,
                  nms_class_agnostic=False):
         super(YOLOv4, self).__init__()
         # ------------------- Basic parameters -------------------
@@ -29,9 +30,10 @@ class YOLOv4(nn.Module):
         self.trainable = trainable                     # 训练的标记
         self.conf_thresh = conf_thresh                 # 得分阈值
         self.nms_thresh = nms_thresh                   # NMS阈值
-        self.topk = topk                               # topk
+        self.topk_candidates = topk                    # topk
         self.stride = [8, 16, 32]                      # 网络的输出步长
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         # ------------------- Anchor box -------------------
         self.num_levels = 3
@@ -103,36 +105,61 @@ class YOLOv4(nn.Module):
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         Input:
-            obj_preds: List(Tensor) [[H x W x A, 1], ...]
-            cls_preds: List(Tensor) [[H x W x A, C], ...]
-            box_preds: List(Tensor) [[H x W x A, 4], ...]
-            anchors:   List(Tensor) [[H x W x A, 2], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+            obj_preds: List[np.array] -> [[M, 1], ...] or None
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
         
         for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            # (H x W x KA x C,)
-            scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
 
-            bboxes = box_pred_i[anchor_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
+
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
@@ -149,10 +176,10 @@ class YOLOv4(nn.Module):
 
         # nms
         scores, labels, bboxes = multiclass_nms(
-            scores, labels, bboxes, self.nms_thresh, self.num_classes, False)
+            scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
 
         return bboxes, scores, labels
-
+    
 
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()

+ 46 - 36
models/detectors/yolov5/yolov5.py

@@ -95,7 +95,7 @@ class YOLOv5(nn.Module):
         return anchors
         
     ## post-process
-    def post_process(self, cls_preds, box_preds, obj_preds=None):
+    def post_process(self, obj_preds, cls_preds, box_preds):
         """
         Input:
             cls_preds: List[np.array] -> [[M, C], ...]
@@ -110,56 +110,69 @@ class YOLOv5(nn.Module):
         all_scores = []
         all_labels = []
         all_bboxes = []
-
-        for level in range(self.num_levels):
-            cls_pred_i = cls_preds[level]
-            box_pred_i = box_preds[level]
-            num_topk = min(self.topk_candidates, box_pred_i.shape[0])
-
-            # filter out by objectness
-            obj_preds_i = obj_preds[level]
-            keep_idxs = obj_preds_i[..., 0] > self.conf_thresh
-            cls_pred_i = obj_preds_i[keep_idxs] * cls_pred_i[keep_idxs]
-            box_pred_i = box_pred_i[keep_idxs]
-
+        
+        for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
             if self.no_multi_labels:
                 # [M,]
-                scores_i, labels_i = np.max(cls_pred_i, axis=1), np.argmax(cls_pred_i, axis=1)
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
                 # topk candidates
-                topk_idxs = np.argsort(-scores_i)
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
                 topk_idxs = topk_idxs[:num_topk]
-                scores_i = scores_i[topk_idxs]
-                labels_i = labels_i[topk_idxs]
-                bboxes_i = box_pred_i[topk_idxs]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
+
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
+
             else:
                 # [M, C] -> [MC,]
-                scores_i = cls_pred_i.flatten()
+                scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
 
-                # topk candidates
-                predicted_prob, topk_idxs = np.sort(scores_i)[::-1], np.argsort(-scores_i)
-                scores_i = predicted_prob[:num_topk]
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
                 topk_idxs = topk_idxs[:num_topk]
 
-                anchor_idxs = topk_idxs // self.num_classes
-                bboxes_i = box_pred_i[anchor_idxs]
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-                labels_i = topk_idxs % self.num_classes
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
 
-            all_scores.append(scores_i)
-            all_labels.append(labels_i)
-            all_bboxes.append(bboxes_i)
+                bboxes = box_pred_i[anchor_idxs]
 
-        scores = np.concatenate(all_scores, axis=0)
-        labels = np.concatenate(all_labels, axis=0)
-        bboxes = np.concatenate(all_bboxes, axis=0)
+            all_scores.append(scores)
+            all_labels.append(labels)
+            all_bboxes.append(bboxes)
+
+        scores = torch.cat(all_scores)
+        labels = torch.cat(all_labels)
+        bboxes = torch.cat(all_bboxes)
+
+        # to cpu & numpy
+        scores = scores.cpu().numpy()
+        labels = labels.cpu().numpy()
+        bboxes = bboxes.cpu().numpy()
 
         # nms
         scores, labels, bboxes = multiclass_nms(
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
 
         return bboxes, scores, labels
-
+    
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()
     def inference_single_image(self, x):
@@ -215,10 +228,7 @@ class YOLOv5(nn.Module):
             return outputs
         else:
             # post process
-            obj_preds = [obj_pred_i.sigmoid().cpu().numpy() for obj_pred_i in all_obj_preds]
-            cls_preds = [cls_pred_i.sigmoid().cpu().numpy() for cls_pred_i in all_cls_preds]
-            box_preds = [box_pred_i.cpu().numpy()           for box_pred_i in all_box_preds]
-            bboxes, scores, labels = self.post_process(cls_preds, box_preds, obj_preds)
+            bboxes, scores, labels = self.post_process(all_obj_preds, all_cls_preds, all_box_preds)
         
             return bboxes, scores, labels
 

+ 1 - 0
models/detectors/yolov7/build.py

@@ -25,6 +25,7 @@ def build_yolov7(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 50 - 22
models/detectors/yolov7/yolov7.py

@@ -20,6 +20,7 @@ class YOLOv7(nn.Module):
                  nms_thresh=0.5,
                  trainable=False,
                  deploy = False,
+                 no_multi_labels = False,
                  nms_class_agnostic = False):
         super(YOLOv7, self).__init__()
         # ------------------- Basic parameters -------------------
@@ -29,9 +30,11 @@ class YOLOv7(nn.Module):
         self.trainable = trainable                     # 训练的标记
         self.conf_thresh = conf_thresh                 # 得分阈值
         self.nms_thresh = nms_thresh                   # NMS阈值
-        self.topk = topk                               # topk
-        self.stride = [8, 16, 32]                      # 网络的输出步长        
+        self.topk_candidates = topk                    # topk
+        self.stride = [8, 16, 32]                      # 网络的输出步长
+        self.num_levels = 3
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         # ------------------- Network Structure -------------------
         ## 主干网络
@@ -87,36 +90,61 @@ class YOLOv7(nn.Module):
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         Input:
-            obj_preds: List(Tensor) [[H x W, 1], ...]
-            cls_preds: List(Tensor) [[H x W, C], ...]
-            box_preds: List(Tensor) [[H x W, 4], ...]
-            anchors:   List(Tensor) [[H x W, 2], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+            obj_preds: List[np.array] -> [[M, 1], ...] or None
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
         
         for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            # (H x W x KA x C,)
-            scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
 
-            bboxes = box_pred_i[anchor_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
+
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
@@ -136,7 +164,7 @@ class YOLOv7(nn.Module):
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
 
         return bboxes, scores, labels
-
+    
 
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()

+ 1 - 0
models/detectors/yolov8/build.py

@@ -25,6 +25,7 @@ def build_yolov8(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 50 - 24
models/detectors/yolov8/yolov8.py

@@ -24,6 +24,7 @@ class YOLOv8(nn.Module):
                  topk        = 1000,
                  trainable   = False,
                  deploy      = False,
+                 no_multi_labels = False,
                  nms_class_agnostic = False):
         super(YOLOv8, self).__init__()
         # ---------------------- Basic Parameters ----------------------
@@ -37,8 +38,9 @@ class YOLOv8(nn.Module):
         self.nms_thresh = nms_thresh
         self.num_levels = len(self.strides)
         self.num_classes = num_classes
-        self.topk = topk
+        self.topk_candidates = topk
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         
         # ---------------------- Network Parameters ----------------------
@@ -69,9 +71,14 @@ class YOLOv8(nn.Module):
     def post_process(self, cls_preds, box_preds):
         """
         Input:
-            cls_preds: List(Tensor) [[H x W, C], ...]
-            box_preds: List(Tensor) [[H x W, 4], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
@@ -79,35 +86,54 @@ class YOLOv8(nn.Module):
         for cls_pred_i, box_pred_i in zip(cls_preds, box_preds):
             cls_pred_i = cls_pred_i[0]
             box_pred_i = box_pred_i[0]
-            
-            # (H x W x C,)
-            scores_i = cls_pred_i.sigmoid().flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(cls_pred_i.sigmoid(), dim=1)
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = cls_pred_i.sigmoid().flatten()
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            bboxes = box_pred_i[anchor_idxs]
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
             all_bboxes.append(bboxes)
 
-        scores = torch.cat(all_scores)
-        labels = torch.cat(all_labels)
-        bboxes = torch.cat(all_bboxes)
+        scores = torch.cat(all_scores, dim=0)
+        labels = torch.cat(all_labels, dim=0)
+        bboxes = torch.cat(all_bboxes, dim=0)
 
         # to cpu & numpy
         scores = scores.cpu().numpy()
@@ -117,9 +143,9 @@ class YOLOv8(nn.Module):
         # nms
         scores, labels, bboxes = multiclass_nms(
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
-        
-        return bboxes, scores, labels
 
+        return bboxes, scores, labels
+    
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()
     def inference_single_image(self, x):

+ 1 - 0
models/detectors/yolox/build.py

@@ -25,6 +25,7 @@ def build_yolox(args, cfg, device, num_classes=80, trainable=False, deploy=False
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 49 - 21
models/detectors/yolox/yolox.py

@@ -18,6 +18,7 @@ class YOLOX(nn.Module):
                  trainable = False, 
                  topk = 1000,
                  deploy = False,
+                 no_multi_labels = False,
                  nms_class_agnostic = False):
         super(YOLOX, self).__init__()
         # ---------------------- Basic Parameters ----------------------
@@ -28,8 +29,10 @@ class YOLOX(nn.Module):
         self.trainable = trainable
         self.conf_thresh = conf_thresh
         self.nms_thresh = nms_thresh
-        self.topk = topk
+        self.num_levels = 3
+        self.topk_candidates = topk
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
                 
         # ------------------- Network Structure -------------------
@@ -82,36 +85,61 @@ class YOLOX(nn.Module):
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
         Input:
-            obj_preds: List(Tensor) [[H x W, 1], ...]
-            cls_preds: List(Tensor) [[H x W, C], ...]
-            box_preds: List(Tensor) [[H x W, 4], ...]
-            anchors:   List(Tensor) [[H x W, 2], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+            obj_preds: List[np.array] -> [[M, 1], ...] or None
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
         
         for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
-            # (H x W x KA x C,)
-            scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid()), dim=1)
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
 
-            bboxes = box_pred_i[anchor_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
+
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
@@ -131,7 +159,7 @@ class YOLOX(nn.Module):
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
 
         return bboxes, scores, labels
-
+    
 
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()

+ 1 - 0
models/detectors/yolox2/build.py

@@ -25,6 +25,7 @@ def build_yolox2(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
                    nms_thresh         = args.nms_thresh,
                    topk               = args.topk,
                    deploy             = deploy,
+                   no_multi_labels    = args.no_multi_labels,
                    nms_class_agnostic = args.nms_class_agnostic
                    )
 

+ 50 - 24
models/detectors/yolox2/yolox2.py

@@ -24,6 +24,7 @@ class YOLOX2(nn.Module):
                  topk        = 1000,
                  trainable   = False,
                  deploy      = False,
+                 no_multi_labels = False,
                  nms_class_agnostic = False):
         super(YOLOX2, self).__init__()
         # ---------------------- Basic Parameters ----------------------
@@ -36,8 +37,9 @@ class YOLOX2(nn.Module):
         self.nms_thresh = nms_thresh
         self.num_levels = len(self.strides)
         self.num_classes = num_classes
-        self.topk = topk
+        self.topk_candidates = topk
         self.deploy = deploy
+        self.no_multi_labels = no_multi_labels
         self.nms_class_agnostic = nms_class_agnostic
         self.head_dim = round(256 * cfg['width'])
         
@@ -68,9 +70,14 @@ class YOLOX2(nn.Module):
     def post_process(self, cls_preds, box_preds):
         """
         Input:
-            cls_preds: List(Tensor) [[H x W, C], ...]
-            box_preds: List(Tensor) [[H x W, 4], ...]
+            cls_preds: List[np.array] -> [[M, C], ...]
+            box_preds: List[np.array] -> [[M, 4], ...]
+        Output:
+            bboxes: np.array -> [N, 4]
+            scores: np.array -> [N,]
+            labels: np.array -> [N,]
         """
+        assert len(cls_preds) == self.num_levels
         all_scores = []
         all_labels = []
         all_bboxes = []
@@ -78,35 +85,54 @@ class YOLOX2(nn.Module):
         for cls_pred_i, box_pred_i in zip(cls_preds, box_preds):
             cls_pred_i = cls_pred_i[0]
             box_pred_i = box_pred_i[0]
-            
-            # (H x W x C,)
-            scores_i = cls_pred_i.sigmoid().flatten()
+            if self.no_multi_labels:
+                # [M,]
+                scores, labels = torch.max(cls_pred_i.sigmoid(), dim=1)
+
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
+
+                # topk candidates
+                predicted_prob, topk_idxs = scores.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
+
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            # Keep top k top scoring indices only.
-            num_topk = min(self.topk, box_pred_i.size(0))
+                labels = labels[topk_idxs]
+                bboxes = box_pred_i[topk_idxs]
+            else:
+                # [M, C] -> [MC,]
+                scores_i = cls_pred_i.sigmoid().flatten()
 
-            # torch.sort is actually faster than .topk (at least on GPUs)
-            predicted_prob, topk_idxs = scores_i.sort(descending=True)
-            topk_scores = predicted_prob[:num_topk]
-            topk_idxs = topk_idxs[:num_topk]
+                # Keep top k top scoring indices only.
+                num_topk = min(self.topk_candidates, box_pred_i.size(0))
 
-            # filter out the proposals with low confidence score
-            keep_idxs = topk_scores > self.conf_thresh
-            scores = topk_scores[keep_idxs]
-            topk_idxs = topk_idxs[keep_idxs]
+                # torch.sort is actually faster than .topk (at least on GPUs)
+                predicted_prob, topk_idxs = scores_i.sort(descending=True)
+                topk_scores = predicted_prob[:num_topk]
+                topk_idxs = topk_idxs[:num_topk]
 
-            anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
-            labels = topk_idxs % self.num_classes
+                # filter out the proposals with low confidence score
+                keep_idxs = topk_scores > self.conf_thresh
+                scores = topk_scores[keep_idxs]
+                topk_idxs = topk_idxs[keep_idxs]
 
-            bboxes = box_pred_i[anchor_idxs]
+                anchor_idxs = torch.div(topk_idxs, self.num_classes, rounding_mode='floor')
+                labels = topk_idxs % self.num_classes
+
+                bboxes = box_pred_i[anchor_idxs]
 
             all_scores.append(scores)
             all_labels.append(labels)
             all_bboxes.append(bboxes)
 
-        scores = torch.cat(all_scores)
-        labels = torch.cat(all_labels)
-        bboxes = torch.cat(all_bboxes)
+        scores = torch.cat(all_scores, dim=0)
+        labels = torch.cat(all_labels, dim=0)
+        bboxes = torch.cat(all_bboxes, dim=0)
 
         # to cpu & numpy
         scores = scores.cpu().numpy()
@@ -116,9 +142,9 @@ class YOLOX2(nn.Module):
         # nms
         scores, labels, bboxes = multiclass_nms(
             scores, labels, bboxes, self.nms_thresh, self.num_classes, self.nms_class_agnostic)
-        
-        return bboxes, scores, labels
 
+        return bboxes, scores, labels
+    
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()
     def inference_single_image(self, x):