1 year ago · 3b05261a01
--- a/dataset/data_augment/ssd_augment.py
+++ b/dataset/data_augment/ssd_augment.py
@@ -346,7 +346,7 @@ class SSDAugmentation(object):
 
				     def __init__(self, img_size=640):
			
 
				         self.img_size = img_size
			
 
				         self.pixel_mean = [0., 0., 0.]
			
 
				-        self.pixel_std  = [1., 1., 1.]
			
 
				+        self.pixel_std  = [255., 255., 255.]
			
 
				         self.color_format = 'bgr'
			
 
				         self.augment = Compose([
			
 
				             ConvertFromInts(),                         # 将int类型转换为float32类型
			
@@ -370,7 +370,9 @@ class SSDAugmentation(object):
 
				         img_tensor = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
			
 
				         target['boxes'] = torch.from_numpy(boxes).float()
			
 
				         target['labels'] = torch.from_numpy(labels).float()
			
 
				-        
			
 
				+
			
 
				+        # normalize image
			
 
				+        img_tensor /= 255.
			
 
				 
			
 
				         return img_tensor, target, ratio
			
 
				     
			
@@ -380,7 +382,7 @@ class SSDBaseTransform(object):
 
				     def __init__(self, img_size):
			
 
				         self.img_size = img_size
			
 
				         self.pixel_mean = [0., 0., 0.]
			
 
				-        self.pixel_std  = [1., 1., 1.]
			
 
				+        self.pixel_std  = [255., 255., 255.]
			
 
				         self.color_format = 'bgr'
			
 
				 
			
 
				     def __call__(self, image, target=None, mosaic=False):
			
@@ -404,4 +406,7 @@ class SSDBaseTransform(object):
 
				             target['boxes'] = torch.from_numpy(boxes).float()
			
 
				             target['labels'] = torch.from_numpy(labels).float()
			
 
				             
			
 
				+        # normalize image
			
 
				+        img_tensor /= 255.
			
 
				+
			
 
				         return img_tensor, target, ratio
			
--- a/dataset/data_augment/yolov5_augment.py
+++ b/dataset/data_augment/yolov5_augment.py
@@ -337,7 +337,7 @@ class YOLOv5Augmentation(object):
 
				         # Basic parameters
			
 
				         self.img_size = img_size
			
 
				         self.pixel_mean = [0., 0., 0.]
			
 
				-        self.pixel_std  = [1., 1., 1.]
			
 
				+        self.pixel_std  = [255., 255., 255.]
			
 
				         self.color_format = 'bgr'
			
 
				         self.trans_config = trans_config
			
 
				         # Albumentations
			
@@ -413,6 +413,9 @@ class YOLOv5Augmentation(object):
 
				         dh = self.img_size - img_h0
			
 
				         dw = self.img_size - img_w0
			
 
				 
			
 
				+        # normalize image
			
 
				+        pad_image /= 255.
			
 
				+
			
 
				         return pad_image, target, ratio #[dw, dh]
			
 
				 
			
 
				 ## YOLOv5-style Transform for Eval
			
@@ -421,7 +424,7 @@ class YOLOv5BaseTransform(object):
 
				         self.img_size = img_size
			
 
				         self.max_stride = max_stride
			
 
				         self.pixel_mean = [0., 0., 0.]
			
 
				-        self.pixel_std  = [1., 1., 1.]
			
 
				+        self.pixel_std  = [255., 255., 255.]
			
 
				         self.color_format = 'bgr'
			
 
				 
			
 
				     def __call__(self, image, target=None, mosaic=False):
			
@@ -461,4 +464,7 @@ class YOLOv5BaseTransform(object):
 
				         pad_image = torch.ones([img_tensor.size(0), pad_img_h, pad_img_w]).float() * 114.
			
 
				         pad_image[:, :img_h0, :img_w0] = img_tensor
			
 
				 
			
 
				+        # normalize image
			
 
				+        pad_image /= 255.
			
 
				+
			
 
				         return pad_image, target, ratio #[dw, dh]
			
--- a/engine.py
+++ b/engine.py
@@ -216,7 +216,7 @@ class Yolov8Trainer(object):
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				             # to device
			
 
				-            images = images.to(self.device, non_blocking=True).float() / 255.
			
 
				+            images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				             if self.args.multi_scale:
			
@@ -584,7 +584,7 @@ class YoloxTrainer(object):
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				             # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float() / 255.
			
 
				+            images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				             if self.args.multi_scale and ni % 10 == 0:
			
@@ -954,7 +954,7 @@ class RTCTrainer(object):
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				             # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float() / 255.
			
 
				+            images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				             if self.args.multi_scale:
			
@@ -1677,7 +1677,7 @@ class RTCTrainerDS(object):
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				             # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float() / 255.
			
 
				+            images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				             if self.args.multi_scale:
			
@@ -2020,7 +2020,7 @@ class RTCTrainerDSP(object):
 
				                         x['momentum'] = np.interp(ni, xi, [self.warmup_dict['warmup_momentum'], self.optimizer_dict['momentum']])
			
 
				                                 
			
 
				             # To device
			
 
				-            images = images.to(self.device, non_blocking=True).float() / 255.
			
 
				+            images = images.to(self.device, non_blocking=True).float()
			
 
				 
			
 
				             # Multi scale
			
 
				             if self.args.multi_scale:
			
--- a/evaluator/coco_evaluator.py
+++ b/evaluator/coco_evaluator.py
@@ -69,7 +69,7 @@ class COCOAPIEvaluator():
 
				 
			
 
				             # preprocess
			
 
				             x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            x = x.unsqueeze(0).to(self.device)
			
 
				             
			
 
				             id_ = int(id_)
			
 
				             ids.append(id_)
			
--- a/evaluator/crowdhuman_evaluator.py
+++ b/evaluator/crowdhuman_evaluator.py
@@ -76,7 +76,7 @@ class CrowdHumanEvaluator():
 
				 
			
 
				             # preprocess
			
 
				             x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            x = x.unsqueeze(0).to(self.device)
			
 
				             
			
 
				             # inference
			
 
				             outputs = model(x)
			
--- a/evaluator/customed_evaluator.py
+++ b/evaluator/customed_evaluator.py
@@ -52,7 +52,7 @@ class CustomedEvaluator():
 
				 
			
 
				             # preprocess
			
 
				             x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            x = x.unsqueeze(0).to(self.device)
			
 
				             
			
 
				             id_ = int(id_)
			
 
				             ids.append(id_)
			
--- a/evaluator/voc_evaluator.py
+++ b/evaluator/voc_evaluator.py
@@ -67,7 +67,7 @@ class VOCAPIEvaluator():
 
				 
			
 
				             # preprocess
			
 
				             x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            x = x.unsqueeze(0).to(self.device)
			
 
				 
			
 
				             # forward
			
 
				             t0 = time.time()
			
--- a/evaluator/widerface_evaluator.py
+++ b/evaluator/widerface_evaluator.py
@@ -64,7 +64,7 @@ class WiderFaceEvaluator():
 
				 
			
 
				             # preprocess
			
 
				             x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            x = x.unsqueeze(0).to(self.device)
			
 
				             
			
 
				             id_ = int(id_)
			
 
				             ids.append(id_)
			
--- a/models/detectors/rtdetr/rtdetr.py
+++ b/models/detectors/rtdetr/rtdetr.py
@@ -64,7 +64,6 @@ class RT_DETR(nn.Module):
 
				             topk_labels = labels[topk_idxs]
			
 
				             topk_bboxes = box_pred[topk_idxs]
			
 
				 
			
 
				-            return topk_bboxes, topk_scores, topk_labels
			
 
				         else:
			
 
				             # Top-k select
			
 
				             cls_pred = cls_pred.flatten().sigmoid_()
			
@@ -101,12 +100,17 @@ class RT_DETR(nn.Module):
 
				         if self.training:
			
 
				             return transformer_outputs
			
 
				         else:
			
 
				+            img_h, img_w = x.shape[2:]
			
 
				             pred_boxes, pred_logits = transformer_outputs[0], transformer_outputs[1]
			
 
				-            box_preds = pred_boxes[-1]
			
 
				-            cls_preds = pred_logits[-1]
			
 
				+            box_pred = pred_boxes[-1]
			
 
				+            cls_pred = pred_logits[-1]
			
 
				+
			
 
				+            # rescale bbox
			
 
				+            box_pred[..., [0, 2]] *= img_h
			
 
				+            box_pred[..., [1, 3]] *= img_w
			
 
				             
			
 
				             # post-process
			
 
				-            bboxes, scores, labels = self.post_process(box_preds, cls_preds)
			
 
				+            bboxes, scores, labels = self.post_process(box_pred, cls_pred)
			
 
				 
			
 
				             outputs = {
			
 
				                 "scores": scores.cpu().numpy(),
			
--- a/test.py
+++ b/test.py
@@ -98,7 +98,7 @@ def test_det(args,
 
				 
			
 
				         # prepare
			
 
				         x, _, ratio = transform(image)
			
 
				-        x = x.unsqueeze(0).to(device) / 255.
			
 
				+        x = x.unsqueeze(0).to(device)
			
 
				 
			
 
				         t0 = time.time()
			
 
				         # inference
			
--- a/utils/box_ops.py
+++ b/utils/box_ops.py
@@ -25,6 +25,8 @@ def rescale_bboxes(bboxes, origin_size, ratio):
 
				     elif isinstance(ratio, List):
			
 
				         bboxes[..., [0, 2]] /= ratio[0]
			
 
				         bboxes[..., [1, 3]] /= ratio[1]
			
 
				+    else:
			
 
				+        raise NotImplementedError("ratio should be a int or List[int, int] type.")
			
 
				 
			
 
				     # clip bboxes
			
 
				     bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_size[0])