Browse Source

modify infer oncde

yjh0410 1 year ago
parent
commit
3b05261a01

+ 8 - 3
dataset/data_augment/ssd_augment.py

@@ -346,7 +346,7 @@ class SSDAugmentation(object):
     def __init__(self, img_size=640):
         self.img_size = img_size
         self.pixel_mean = [0., 0., 0.]
-        self.pixel_std  = [1., 1., 1.]
+        self.pixel_std  = [255., 255., 255.]
         self.color_format = 'bgr'
         self.augment = Compose([
             ConvertFromInts(),                         # 将int类型转换为float32类型
@@ -370,7 +370,9 @@ class SSDAugmentation(object):
         img_tensor = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
         target['boxes'] = torch.from_numpy(boxes).float()
         target['labels'] = torch.from_numpy(labels).float()
-        
+
+        # normalize image
+        img_tensor /= 255.
 
         return img_tensor, target, ratio
     
@@ -380,7 +382,7 @@ class SSDBaseTransform(object):
     def __init__(self, img_size):
         self.img_size = img_size
         self.pixel_mean = [0., 0., 0.]
-        self.pixel_std  = [1., 1., 1.]
+        self.pixel_std  = [255., 255., 255.]
         self.color_format = 'bgr'
 
     def __call__(self, image, target=None, mosaic=False):
@@ -404,4 +406,7 @@ class SSDBaseTransform(object):
             target['boxes'] = torch.from_numpy(boxes).float()
             target['labels'] = torch.from_numpy(labels).float()
             
+        # normalize image
+        img_tensor /= 255.
+
         return img_tensor, target, ratio

+ 8 - 2
dataset/data_augment/yolov5_augment.py

@@ -337,7 +337,7 @@ class YOLOv5Augmentation(object):
         # Basic parameters
         self.img_size = img_size
         self.pixel_mean = [0., 0., 0.]
-        self.pixel_std  = [1., 1., 1.]
+        self.pixel_std  = [255., 255., 255.]
         self.color_format = 'bgr'
         self.trans_config = trans_config
         # Albumentations
@@ -413,6 +413,9 @@ class YOLOv5Augmentation(object):
         dh = self.img_size - img_h0
         dw = self.img_size - img_w0
 
+        # normalize image
+        pad_image /= 255.
+
         return pad_image, target, ratio #[dw, dh]
 
 ## YOLOv5-style Transform for Eval
@@ -421,7 +424,7 @@ class YOLOv5BaseTransform(object):
         self.img_size = img_size
         self.max_stride = max_stride
         self.pixel_mean = [0., 0., 0.]
-        self.pixel_std  = [1., 1., 1.]
+        self.pixel_std  = [255., 255., 255.]
         self.color_format = 'bgr'
 
     def __call__(self, image, target=None, mosaic=False):
@@ -461,4 +464,7 @@ class YOLOv5BaseTransform(object):
         pad_image = torch.ones([img_tensor.size(0), pad_img_h, pad_img_w]).float() * 114.
         pad_image[:, :img_h0, :img_w0] = img_tensor
 
+        # normalize image
+        pad_image /= 255.
+
         return pad_image, target, ratio #[dw, dh]

+ 5 - 5
engine.py

@@ -216,7 +216,7 @@ class Yolov8Trainer(object):
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
             # to device
-            images = images.to(self.device, non_blocking=True).float() / 255.
+            images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
             if self.args.multi_scale:
@@ -584,7 +584,7 @@ class YoloxTrainer(object):
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
             # To device
-            images = images.to(self.device, non_blocking=True).float() / 255.
+            images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
             if self.args.multi_scale and ni % 10 == 0:
@@ -954,7 +954,7 @@ class RTCTrainer(object):
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
             # To device
-            images = images.to(self.device, non_blocking=True).float() / 255.
+            images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
             if self.args.multi_scale:
@@ -1677,7 +1677,7 @@ class RTCTrainerDS(object):
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
             # To device
-            images = images.to(self.device, non_blocking=True).float() / 255.
+            images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
             if self.args.multi_scale:
@@ -2020,7 +2020,7 @@ class RTCTrainerDSP(object):
                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
                                 
             # To device
-            images = images.to(self.device, non_blocking=True).float() / 255.
+            images = images.to(self.device, non_blocking=True).float()
 
             # Multi scale
             if self.args.multi_scale:

+ 1 - 1
evaluator/coco_evaluator.py

@@ -69,7 +69,7 @@ class COCOAPIEvaluator():
 
             # preprocess
             x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device) / 255.
+            x = x.unsqueeze(0).to(self.device)
             
             id_ = int(id_)
             ids.append(id_)

+ 1 - 1
evaluator/crowdhuman_evaluator.py

@@ -76,7 +76,7 @@ class CrowdHumanEvaluator():
 
             # preprocess
             x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device) / 255.
+            x = x.unsqueeze(0).to(self.device)
             
             # inference
             outputs = model(x)

+ 1 - 1
evaluator/customed_evaluator.py

@@ -52,7 +52,7 @@ class CustomedEvaluator():
 
             # preprocess
             x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device) / 255.
+            x = x.unsqueeze(0).to(self.device)
             
             id_ = int(id_)
             ids.append(id_)

+ 1 - 1
evaluator/voc_evaluator.py

@@ -67,7 +67,7 @@ class VOCAPIEvaluator():
 
             # preprocess
             x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device) / 255.
+            x = x.unsqueeze(0).to(self.device)
 
             # forward
             t0 = time.time()

+ 1 - 1
evaluator/widerface_evaluator.py

@@ -64,7 +64,7 @@ class WiderFaceEvaluator():
 
             # preprocess
             x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device) / 255.
+            x = x.unsqueeze(0).to(self.device)
             
             id_ = int(id_)
             ids.append(id_)

+ 8 - 4
models/detectors/rtdetr/rtdetr.py

@@ -64,7 +64,6 @@ class RT_DETR(nn.Module):
             topk_labels = labels[topk_idxs]
             topk_bboxes = box_pred[topk_idxs]
 
-            return topk_bboxes, topk_scores, topk_labels
         else:
             # Top-k select
             cls_pred = cls_pred.flatten().sigmoid_()
@@ -101,12 +100,17 @@ class RT_DETR(nn.Module):
         if self.training:
             return transformer_outputs
         else:
+            img_h, img_w = x.shape[2:]
             pred_boxes, pred_logits = transformer_outputs[0], transformer_outputs[1]
-            box_preds = pred_boxes[-1]
-            cls_preds = pred_logits[-1]
+            box_pred = pred_boxes[-1]
+            cls_pred = pred_logits[-1]
+
+            # rescale bbox
+            box_pred[..., [0, 2]] *= img_h
+            box_pred[..., [1, 3]] *= img_w
             
             # post-process
-            bboxes, scores, labels = self.post_process(box_preds, cls_preds)
+            bboxes, scores, labels = self.post_process(box_pred, cls_pred)
 
             outputs = {
                 "scores": scores.cpu().numpy(),

+ 1 - 1
test.py

@@ -98,7 +98,7 @@ def test_det(args,
 
         # prepare
         x, _, ratio = transform(image)
-        x = x.unsqueeze(0).to(device) / 255.
+        x = x.unsqueeze(0).to(device)
 
         t0 = time.time()
         # inference

+ 2 - 0
utils/box_ops.py

@@ -25,6 +25,8 @@ def rescale_bboxes(bboxes, origin_size, ratio):
     elif isinstance(ratio, List):
         bboxes[..., [0, 2]] /= ratio[0]
         bboxes[..., [1, 3]] /= ratio[1]
+    else:
+        raise NotImplementedError("ratio should be a int or List[int, int] type.")
 
     # clip bboxes
     bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_size[0])