浏览代码

nodify YOLOvx

yjh0410 2 年之前
父节点
当前提交
4f4412f1bc

+ 1 - 22
models/detectors/yolovx/build.py

@@ -30,28 +30,7 @@ def build_yolovx(args, cfg, device, num_classes=80, trainable=False, deploy=Fals
         if isinstance(m, nn.BatchNorm2d):
             m.eps = 1e-3
             m.momentum = 0.03    
-    # Init bias
-    init_prob = 0.01
-    bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
-    # obj pred
-    for obj_pred in model.obj_preds:
-        b = obj_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-    # cls pred
-    for cls_pred in model.cls_preds:
-        b = cls_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-    # reg pred
-    for reg_pred in model.reg_preds:
-        b = reg_pred.bias.view(-1, )
-        b.data.fill_(1.0)
-        reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        w = reg_pred.weight
-        w.data.fill_(0.)
-        reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
-
+            
     # -------------- Build criterion --------------
     criterion = None
     if trainable:

+ 38 - 115
models/detectors/yolovx/yolovx.py

@@ -7,7 +7,8 @@ import torch.nn.functional as F
 from .yolovx_backbone import build_backbone
 from .yolovx_neck import build_neck
 from .yolovx_pafpn import build_fpn
-from .yolovx_head import build_head
+from .yolovx_head import build_det_head
+from .yolovx_pred import build_pred_layer
 
 # --------------- External components ---------------
 from utils.misc import multiclass_nms
@@ -50,43 +51,13 @@ class YOLOvx(nn.Module):
         self.fpn_dims = self.fpn.out_dim
 
         ## ----------- Heads -----------
-        self.heads = nn.ModuleList(
-            [build_head(cfg, fpn_dim, self.head_dim, num_classes) 
-            for fpn_dim in self.fpn_dims
-            ])
+        self.det_heads = build_det_head(cfg, self.fpn_dims, self.head_dim, num_classes)
 
         ## ----------- Preds -----------
-        self.obj_preds = nn.ModuleList(
-                            [nn.Conv2d(head.reg_out_dim, 1, kernel_size=1) 
-                                for head in self.heads
-                              ]) 
-        self.cls_preds = nn.ModuleList(
-                            [nn.Conv2d(head.cls_out_dim, self.num_classes, kernel_size=1) 
-                                for head in self.heads
-                              ]) 
-        self.reg_preds = nn.ModuleList(
-                            [nn.Conv2d(head.reg_out_dim, 4, kernel_size=1) 
-                                for head in self.heads
-                              ])                 
-
-
-    # ---------------------- Basic Functions ----------------------
-    ## generate anchor points
-    def generate_anchors(self, level, fmp_size):
-        """
-            fmp_size: (List) [H, W]
-        """
-        # generate grid cells
-        fmp_h, fmp_w = fmp_size
-        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
-        # [H, W, 2] -> [HW, 2]
-        anchor_xy = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
-        anchor_xy += 0.5  # add center offset
-        anchor_xy *= self.stride[level]
-        anchors = anchor_xy.to(self.device)
-
-        return anchors
-        
+        self.pred_layers = build_pred_layer(
+            self.head_dim, self.head_dim, self.stride, num_classes, num_coords=4, num_levels=len(self.stride))
+
+
     ## post-process
     def post_process(self, obj_preds, cls_preds, box_preds):
         """
@@ -101,6 +72,10 @@ class YOLOvx(nn.Module):
         all_bboxes = []
         
         for obj_pred_i, cls_pred_i, box_pred_i in zip(obj_preds, cls_preds, box_preds):
+            obj_pred_i = obj_pred_i[0]
+            cls_pred_i = cls_pred_i[0]
+            box_pred_i = box_pred_i[0]
+            
             # (H x W x KA x C,)
             scores_i = (torch.sqrt(obj_pred_i.sigmoid() * cls_pred_i.sigmoid())).flatten()
 
@@ -145,48 +120,29 @@ class YOLOvx(nn.Module):
     # ---------------------- Main Process for Inference ----------------------
     @torch.no_grad()
     def inference_single_image(self, x):
-        # backbone
+        # ---------------- Backbone ----------------
         pyramid_feats = self.backbone(x)
 
-        # fpn
+        # ---------------- Neck: SPP ----------------
+        pyramid_feats[-1] = self.neck(pyramid_feats[-1])
+
+        # ---------------- Neck: PaFPN ----------------
         pyramid_feats = self.fpn(pyramid_feats)
 
-        # non-shared heads
-        all_obj_preds = []
-        all_cls_preds = []
-        all_box_preds = []
-        for level, (feat, head) in enumerate(zip(pyramid_feats, self.heads)):
-            cls_feat, reg_feat = head(feat)
-
-            # [1, C, H, W]
-            obj_pred = self.obj_preds[level](reg_feat)
-            cls_pred = self.cls_preds[level](cls_feat)
-            reg_pred = self.reg_preds[level](reg_feat)
-
-            # anchors: [M, 2]
-            fmp_size = cls_pred.shape[-2:]
-            anchors = self.generate_anchors(level, fmp_size)
-
-            # [1, C, H, W] -> [H, W, C] -> [M, C]
-            obj_pred = obj_pred[0].permute(1, 2, 0).contiguous().view(-1, 1)
-            cls_pred = cls_pred[0].permute(1, 2, 0).contiguous().view(-1, self.num_classes)
-            reg_pred = reg_pred[0].permute(1, 2, 0).contiguous().view(-1, 4)
-
-            # decode bbox
-            ctr_pred = reg_pred[..., :2] * self.stride[level] + anchors[..., :2]
-            wh_pred = torch.exp(reg_pred[..., 2:]) * self.stride[level]
-            pred_x1y1 = ctr_pred - wh_pred * 0.5
-            pred_x2y2 = ctr_pred + wh_pred * 0.5
-            box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
-
-            all_obj_preds.append(obj_pred)
-            all_cls_preds.append(cls_pred)
-            all_box_preds.append(box_pred)
+        # ---------------- Heads ----------------
+        cls_feats, reg_feats = self.det_heads(pyramid_feats)
+
+        # ---------------- Preds ----------------
+        outputs = self.pred_layers(cls_feats, reg_feats)
+
+        all_obj_preds = outputs['pred_obj']
+        all_cls_preds = outputs['pred_cls']
+        all_box_preds = outputs['pred_box']
 
         if self.deploy:
-            obj_preds = torch.cat(all_obj_preds, dim=0)
-            cls_preds = torch.cat(all_cls_preds, dim=0)
-            box_preds = torch.cat(all_box_preds, dim=0)
+            obj_preds = torch.cat(all_obj_preds, dim=1)[0]
+            cls_preds = torch.cat(all_cls_preds, dim=1)[0]
+            box_preds = torch.cat(all_box_preds, dim=1)[0]
             scores = torch.sqrt(obj_preds.sigmoid() * cls_preds.sigmoid())
             bboxes = box_preds
             # [n_anchors_all, 4 + C]
@@ -205,52 +161,19 @@ class YOLOvx(nn.Module):
         if not self.trainable:
             return self.inference_single_image(x)
         else:
-            # backbone
+            # ---------------- Backbone ----------------
             pyramid_feats = self.backbone(x)
 
-            # fpn
+            # ---------------- Neck: SPP ----------------
+            pyramid_feats[-1] = self.neck(pyramid_feats[-1])
+
+            # ---------------- Neck: PaFPN ----------------
             pyramid_feats = self.fpn(pyramid_feats)
 
-            # non-shared heads
-            all_anchors = []
-            all_obj_preds = []
-            all_cls_preds = []
-            all_box_preds = []
-            for level, (feat, head) in enumerate(zip(pyramid_feats, self.heads)):
-                cls_feat, reg_feat = head(feat)
-
-                # [B, C, H, W]
-                obj_pred = self.obj_preds[level](reg_feat)
-                cls_pred = self.cls_preds[level](cls_feat)
-                reg_pred = self.reg_preds[level](reg_feat)
-
-                B, _, H, W = cls_pred.size()
-                fmp_size = [H, W]
-                # generate anchor boxes: [M, 4]
-                anchors = self.generate_anchors(level, fmp_size)
-                
-                # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
-                obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
-                cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
-                reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
-
-                # decode bbox
-                ctr_pred = reg_pred[..., :2] * self.stride[level] + anchors[..., :2]
-                wh_pred = torch.exp(reg_pred[..., 2:]) * self.stride[level]
-                pred_x1y1 = ctr_pred - wh_pred * 0.5
-                pred_x2y2 = ctr_pred + wh_pred * 0.5
-                box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
-
-                all_obj_preds.append(obj_pred)
-                all_cls_preds.append(cls_pred)
-                all_box_preds.append(box_pred)
-                all_anchors.append(anchors)
-            
-            # output dict
-            outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
-                       "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
-                       "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
-                       "anchors": all_anchors,           # List(Tensor) [B, M, 2]
-                       'strides': self.stride}           # List(Int) [8, 16, 32]
+            # ---------------- Heads ----------------
+            cls_feats, reg_feats = self.det_heads(pyramid_feats)
 
+            # ---------------- Preds ----------------
+            outputs = self.pred_layers(cls_feats, reg_feats)
+            
             return outputs 

+ 70 - 31
models/detectors/yolovx/yolovx_head.py

@@ -4,55 +4,55 @@ import torch.nn as nn
 from .yolovx_basic import Conv
 
 
-class DecoupledHead(nn.Module):
-    def __init__(self, cfg, in_dim, out_dim, num_classes=80):
+class SingleLevelHead(nn.Module):
+    def __init__(self, in_dim, out_dim, num_classes, num_cls_head, num_reg_head, act_type, norm_type, depthwise):
         super().__init__()
-        print('==============================')
-        print('Head: Decoupled Head')
+        # --------- Basic Parameters ----------
         self.in_dim = in_dim
-        self.num_cls_head=cfg['num_cls_head']
-        self.num_reg_head=cfg['num_reg_head']
-        self.act_type=cfg['head_act']
-        self.norm_type=cfg['head_norm']
-
-        # cls head
+        self.num_classes = num_classes
+        self.num_cls_head = num_cls_head
+        self.num_reg_head = num_reg_head
+        self.act_type = act_type
+        self.norm_type = norm_type
+        self.depthwise = depthwise
+        
+        # --------- Network Parameters ----------
+        ## cls head
         cls_feats = []
         self.cls_out_dim = out_dim
-        for i in range(cfg['num_cls_head']):
+        for i in range(num_cls_head):
             if i == 0:
                 cls_feats.append(
                     Conv(in_dim, self.cls_out_dim, k=3, p=1, s=1, 
-                        act_type=self.act_type,
-                        norm_type=self.norm_type,
-                        depthwise=cfg['head_depthwise'])
+                         act_type=act_type,
+                         norm_type=norm_type,
+                         depthwise=depthwise)
                         )
             else:
                 cls_feats.append(
                     Conv(self.cls_out_dim, self.cls_out_dim, k=3, p=1, s=1, 
-                        act_type=self.act_type,
-                        norm_type=self.norm_type,
-                        depthwise=cfg['head_depthwise'])
-                        )
-                
-        # reg head
+                        act_type=act_type,
+                        norm_type=norm_type,
+                        depthwise=depthwise)
+                        )      
+        ## reg head
         reg_feats = []
         self.reg_out_dim = out_dim
-        for i in range(cfg['num_reg_head']):
+        for i in range(num_reg_head):
             if i == 0:
                 reg_feats.append(
                     Conv(in_dim, self.reg_out_dim, k=3, p=1, s=1, 
-                        act_type=self.act_type,
-                        norm_type=self.norm_type,
-                        depthwise=cfg['head_depthwise'])
+                         act_type=act_type,
+                         norm_type=norm_type,
+                         depthwise=depthwise)
                         )
             else:
                 reg_feats.append(
                     Conv(self.reg_out_dim, self.reg_out_dim, k=3, p=1, s=1, 
-                        act_type=self.act_type,
-                        norm_type=self.norm_type,
-                        depthwise=cfg['head_depthwise'])
+                         act_type=act_type,
+                         norm_type=norm_type,
+                         depthwise=depthwise)
                         )
-
         self.cls_feats = nn.Sequential(*cls_feats)
         self.reg_feats = nn.Sequential(*reg_feats)
 
@@ -67,8 +67,47 @@ class DecoupledHead(nn.Module):
         return cls_feats, reg_feats
     
 
+class MultiLevelHead(nn.Module):
+    def __init__(self, cfg, in_dims, out_dim, num_classes=80):
+        super().__init__()
+        # --------- Basic Parameters ----------
+        self.in_dims = in_dims
+        self.num_classes = num_classes
+
+        ## ----------- Network Parameters -----------
+        self.det_heads = nn.ModuleList(
+            [SingleLevelHead(
+                in_dim,
+                out_dim,
+                num_classes,
+                cfg['num_cls_head'],
+                cfg['num_reg_head'],
+                cfg['head_act'],
+                cfg['head_norm'],
+                cfg['head_depthwise'])
+                for in_dim in in_dims
+            ])
+
+
+    def forward(self, feats):
+        """
+            feats: List[(Tensor)] [[B, C, H, W], ...]
+        """
+        cls_feats = []
+        reg_feats = []
+        for feat, head in zip(feats, self.det_heads):
+            # ---------------- Pred ----------------
+            cls_feat, reg_feat = head(feat)
+
+            cls_feats.append(cls_feat)
+            reg_feats.append(reg_feat)
+
+        return cls_feats, reg_feats
+    
+
 # build detection head
-def build_head(cfg, in_dim, out_dim, num_classes=80):
-    head = DecoupledHead(cfg, in_dim, out_dim, num_classes) 
+def build_det_head(cfg, in_dim, out_dim, num_classes=80):
+    if cfg['head'] == 'decoupled_head':
+        head = MultiLevelHead(cfg, in_dim, out_dim, num_classes) 
 
-    return head
+    return head

+ 143 - 0
models/detectors/yolovx/yolovx_pred.py

@@ -0,0 +1,143 @@
+import torch
+import torch.nn as nn
+
+
+class SingleLevelPredLayer(nn.Module):
+    def __init__(self, cls_dim, reg_dim, num_classes, num_coords=4):
+        super().__init__()
+        # --------- Basic Parameters ----------
+        self.cls_dim = cls_dim
+        self.reg_dim = reg_dim
+        self.num_classes = num_classes
+        self.num_coords = num_coords
+
+        # --------- Network Parameters ----------
+        self.obj_pred = nn.Conv2d(reg_dim, 1, kernel_size=1)
+        self.cls_pred = nn.Conv2d(cls_dim, num_classes, kernel_size=1)
+        self.reg_pred = nn.Conv2d(reg_dim, num_coords, kernel_size=1)                
+
+        self.init_bias()
+        
+
+    def init_bias(self):
+        # Init bias
+        init_prob = 0.01
+        bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
+        # obj pred
+        b = self.obj_pred.bias.view(1, -1)
+        b.data.fill_(bias_value.item())
+        self.obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # cls pred
+        b = self.cls_pred.bias.view(1, -1)
+        b.data.fill_(bias_value.item())
+        self.cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        # reg pred
+        b = self.reg_pred.bias.view(-1, )
+        b.data.fill_(1.0)
+        self.reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+        w = self.reg_pred.weight
+        w.data.fill_(0.)
+        self.reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
+
+
+    def forward(self, cls_feat, reg_feat):
+        """
+            in_feats: (Tensor) [B, C, H, W]
+        """
+        obj_pred = self.obj_pred(reg_feat)
+        cls_pred = self.cls_pred(cls_feat)
+        reg_pred = self.reg_pred(reg_feat)
+
+        return obj_pred, cls_pred, reg_pred
+    
+
+class MultiLevelHead(nn.Module):
+    def __init__(self, cls_dim, reg_dim, strides, num_classes, num_coords=4, num_levels=3):
+        super().__init__()
+        # --------- Basic Parameters ----------
+        self.cls_dim = cls_dim
+        self.reg_dim = reg_dim
+        self.strides = strides
+        self.num_classes = num_classes
+        self.num_coords = num_coords
+        self.num_levels = num_levels
+
+        ## ----------- Network Parameters -----------
+        self.pred_layers = nn.ModuleList(
+            [SingleLevelPredLayer(
+                cls_dim,
+                reg_dim,
+                num_classes,
+                num_coords)
+                for _ in range(num_levels)
+            ])
+
+    def generate_anchors(self, level, fmp_size):
+        """
+            fmp_size: (List) [H, W]
+        """
+        # generate grid cells
+        fmp_h, fmp_w = fmp_size
+        anchor_y, anchor_x = torch.meshgrid([torch.arange(fmp_h), torch.arange(fmp_w)])
+        # [H, W, 2] -> [HW, 2]
+        anchors = torch.stack([anchor_x, anchor_y], dim=-1).float().view(-1, 2)
+        anchors += 0.5  # add center offset
+        anchors *= self.strides[level]
+
+        return anchors
+        
+
+    def decode_bbox(self, reg_pred, anchors, stride):
+        ctr_pred = reg_pred[..., :2] * stride + anchors[..., :2]
+        wh_pred = torch.exp(reg_pred[..., 2:]) * stride
+        pred_x1y1 = ctr_pred - wh_pred * 0.5
+        pred_x2y2 = ctr_pred + wh_pred * 0.5
+        box_pred = torch.cat([pred_x1y1, pred_x2y2], dim=-1)
+
+        return box_pred
+    
+
+    def forward(self, cls_feats, reg_feats):
+        """
+            feats: List[(Tensor)] [[B, C, H, W], ...]
+        """
+        all_anchors = []
+        all_obj_preds = []
+        all_cls_preds = []
+        all_box_preds = []
+        for level in range(self.num_levels):
+            obj_pred, cls_pred, reg_pred = self.pred_layers[level](
+                cls_feats[level], reg_feats[level])
+
+            B, _, H, W = cls_pred.size()
+            fmp_size = [H, W]
+            # generate anchor boxes: [M, 4]
+            anchors = self.generate_anchors(level, fmp_size)
+            anchors = anchors.to(cls_pred.device)
+            
+            # [B, C, H, W] -> [B, H, W, C] -> [B, M, C]
+            obj_pred = obj_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 1)
+            cls_pred = cls_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, self.num_classes)
+            reg_pred = reg_pred.permute(0, 2, 3, 1).contiguous().view(B, -1, 4)
+            box_pred = self.decode_bbox(reg_pred, anchors, self.strides[level])
+
+            all_obj_preds.append(obj_pred)
+            all_cls_preds.append(cls_pred)
+            all_box_preds.append(box_pred)
+            all_anchors.append(anchors)
+
+            # output dict
+            outputs = {"pred_obj": all_obj_preds,        # List(Tensor) [B, M, 1]
+                       "pred_cls": all_cls_preds,        # List(Tensor) [B, M, C]
+                       "pred_box": all_box_preds,        # List(Tensor) [B, M, 4]
+                       "anchors": all_anchors,           # List(Tensor) [B, M, 2]
+                       "strides": self.strides}           # List(Int) [8, 16, 32]
+
+        return outputs
+    
+
+# build detection head
+def build_pred_layer(cls_dim, reg_dim, strides, num_classes, num_coords=4, num_levels=3):
+    pred_layers = MultiLevelHead(cls_dim, reg_dim, strides, num_classes, num_coords, num_levels) 
+
+    return pred_layers

+ 0 - 22
models/detectors/yolox/build.py

@@ -33,28 +33,6 @@ def build_yolox(args, cfg, device, num_classes=80, trainable=False, deploy=False
         if isinstance(m, nn.BatchNorm2d):
             m.eps = 1e-3
             m.momentum = 0.03    
-    # Init bias
-    init_prob = 0.01
-    bias_value = -torch.log(torch.tensor((1. - init_prob) / init_prob))
-    # obj pred
-    for obj_pred in model.obj_preds:
-        b = obj_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        obj_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-    # cls pred
-    for cls_pred in model.cls_preds:
-        b = cls_pred.bias.view(1, -1)
-        b.data.fill_(bias_value.item())
-        cls_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-    # reg pred
-    for reg_pred in model.reg_preds:
-        b = reg_pred.bias.view(-1, )
-        b.data.fill_(1.0)
-        reg_pred.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-        w = reg_pred.weight
-        w.data.fill_(0.)
-        reg_pred.weight = torch.nn.Parameter(w, requires_grad=True)
-
 
     # -------------- Build criterion --------------
     criterion = None