1 rok temu · 7a392783f8
--- a/dataset/data_augment/ssd_augment.py
+++ b/dataset/data_augment/ssd_augment.py
@@ -363,10 +363,12 @@ class SSDAugmentation(object):
 
				         ])
			
 
				 
			
 
				     def __call__(self, image, target, mosaic=False):
			
 
				+        orig_h, orig_w = image.shape[:2]
			
 
				+        ratio = [self.img_size / orig_w, self.img_size / orig_h]
			
 
				+
			
 
				+        # augment
			
 
				         boxes = target['boxes'].copy()
			
 
				         labels = target['labels'].copy()
			
 
				-        deltas = None
			
 
				-        # augment
			
 
				         image, boxes, labels = self.augment(image, boxes, labels)
			
 
				 
			
 
				         # to tensor
			
@@ -375,7 +377,7 @@ class SSDAugmentation(object):
 
				         target['labels'] = torch.from_numpy(labels).float()
			
 
				         
			
 
				 
			
 
				-        return img_tensor, target, deltas
			
 
				+        return img_tensor, target, ratio
			
 
				     
			
 
				 
			
 
				 ## SSD-style valTransform
			
@@ -384,9 +386,9 @@ class SSDBaseTransform(object):
 
				         self.img_size = img_size
			
 
				 
			
 
				     def __call__(self, image, target=None, mosaic=False):
			
 
				-        deltas = None
			
 
				         # resize
			
 
				         orig_h, orig_w = image.shape[:2]
			
 
				+        ratio = [self.img_size / orig_w, self.img_size / orig_h]
			
 
				         image = cv2.resize(image, (self.img_size, self.img_size)).astype(np.float32)
			
 
				         
			
 
				         # scale targets
			
@@ -404,4 +406,4 @@ class SSDBaseTransform(object):
 
				             target['boxes'] = torch.from_numpy(boxes).float()
			
 
				             target['labels'] = torch.from_numpy(labels).float()
			
 
				             
			
 
				-        return img_tensor, target, deltas
			
 
				+        return img_tensor, target, ratio
			
--- a/dataset/data_augment/yolov5_augment.py
+++ b/dataset/data_augment/yolov5_augment.py
@@ -339,10 +339,10 @@ class YOLOv5Augmentation(object):
 
				     def __call__(self, image, target, mosaic=False):
			
 
				         # --------------- Keep ratio Resize ---------------
			
 
				         img_h0, img_w0 = image.shape[:2]
			
 
				-        r = self.img_size / max(img_h0, img_w0)
			
 
				-        if r != 1: 
			
 
				+        ratio = self.img_size / max(img_h0, img_w0)
			
 
				+        if ratio != 1: 
			
 
				             interp = cv2.INTER_LINEAR
			
 
				-            new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
			
 
				+            new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
			
 
				             img = cv2.resize(image, new_shape, interpolation=interp)
			
 
				         else:
			
 
				             img = image
			
@@ -406,7 +406,7 @@ class YOLOv5Augmentation(object):
 
				         dh = self.img_size - img_h0
			
 
				         dw = self.img_size - img_w0
			
 
				 
			
 
				-        return pad_image, target, [dw, dh]
			
 
				+        return pad_image, target, ratio #[dw, dh]
			
 
				 
			
 
				 ## YOLOv5-style Transform for Eval
			
 
				 class YOLOv5BaseTransform(object):
			
@@ -419,9 +419,9 @@ class YOLOv5BaseTransform(object):
 
				         # --------------- Keep ratio Resize ---------------
			
 
				         ## Resize image
			
 
				         img_h0, img_w0 = image.shape[:2]
			
 
				-        r = self.img_size / max(img_h0, img_w0)
			
 
				-        if r != 1: 
			
 
				-            new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
			
 
				+        ratio = self.img_size / max(img_h0, img_w0)
			
 
				+        if ratio != 1: 
			
 
				+            new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
			
 
				             img = cv2.resize(image, new_shape, interpolation=cv2.INTER_LINEAR)
			
 
				         else:
			
 
				             img = image
			
@@ -452,4 +452,4 @@ class YOLOv5BaseTransform(object):
 
				         pad_image = torch.ones([img_tensor.size(0), pad_img_h, pad_img_w]).float() * 114.
			
 
				         pad_image[:, :img_h0, :img_w0] = img_tensor
			
 
				 
			
 
				-        return pad_image, target, [dw, dh]
			
 
				+        return pad_image, target, ratio #[dw, dh]
			
--- a/evaluator/coco_evaluator.py
+++ b/evaluator/coco_evaluator.py
@@ -68,7 +68,7 @@ class COCOAPIEvaluator():
 
				             orig_h, orig_w, _ = img.shape
			
 
				 
			
 
				             # preprocess
			
 
				-            x, _, deltas = self.transform(img)
			
 
				+            x, _, ratio = self.transform(img)
			
 
				             x = x.unsqueeze(0).to(self.device) / 255.
			
 
				             
			
 
				             id_ = int(id_)
			
@@ -79,9 +79,7 @@ class COCOAPIEvaluator():
 
				             bboxes, scores, cls_inds = outputs
			
 
				 
			
 
				             # rescale bboxes
			
 
				-            origin_img_size = [orig_h, orig_w]
			
 
				-            cur_img_size = [*x.shape[-2:]]
			
 
				-            bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
			
 
				 
			
 
				             # process outputs
			
 
				             for i, box in enumerate(bboxes):
			
--- a/test.py
+++ b/test.py
@@ -59,7 +59,7 @@ def parse_args():
 
				                         help='Perform NMS operations regardless of category.')
			
 
				 
			
 
				     # dataset
			
 
				-    parser.add_argument('--root', default='/mnt/share/ssd2/dataset',
			
 
				+    parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
			
 
				                         help='data root')
			
 
				     parser.add_argument('-d', '--dataset', default='coco',
			
 
				                         help='coco, voc.')
			
@@ -136,7 +136,7 @@ def test(args,
 
				         orig_h, orig_w, _ = image.shape
			
 
				 
			
 
				         # prepare
			
 
				-        x, _, deltas = transform(image)
			
 
				+        x, _, ratio = transform(image)
			
 
				         x = x.unsqueeze(0).to(device) / 255.
			
 
				 
			
 
				         t0 = time.time()
			
@@ -145,9 +145,7 @@ def test(args,
 
				         print("detection time used ", time.time() - t0, "s")
			
 
				         
			
 
				         # rescale bboxes
			
 
				-        origin_img_size = [orig_h, orig_w]
			
 
				-        cur_img_size = [*x.shape[-2:]]
			
 
				-        bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+        bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
			
 
				 
			
 
				         # vis detection
			
 
				         img_processed = visualize(
			
--- a/utils/box_ops.py
+++ b/utils/box_ops.py
@@ -17,25 +17,13 @@ def box_xyxy_to_cxcywh(x):
 
				          (x1 - x0), (y1 - y0)]
			
 
				     return torch.stack(b, dim=-1)
			
 
				 
			
 
				-def rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas=None):
			
 
				-    origin_h, origin_w = origin_img_size
			
 
				-    cur_img_h, cur_img_w = cur_img_size
			
 
				-    if deltas is None:
			
 
				-        # rescale
			
 
				-        bboxes[..., [0, 2]] = bboxes[..., [0, 2]] / cur_img_w * origin_w
			
 
				-        bboxes[..., [1, 3]] = bboxes[..., [1, 3]] / cur_img_h * origin_h
			
 
				-
			
 
				-        # clip bboxes
			
 
				-        bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_w)
			
 
				-        bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_h)
			
 
				-    else:
			
 
				-        # rescale
			
 
				-        bboxes[..., [0, 2]] = bboxes[..., [0, 2]] / (cur_img_w - deltas[0]) * origin_w
			
 
				-        bboxes[..., [1, 3]] = bboxes[..., [1, 3]] / (cur_img_h - deltas[1]) * origin_h
			
 
				-        
			
 
				-        # clip bboxes
			
 
				-        bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_w)
			
 
				-        bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_h)
			
 
				+def rescale_bboxes(bboxes, origin_size, ratio):
			
 
				+    # rescale bboxes
			
 
				+    bboxes /= ratio
			
 
				+
			
 
				+    # clip bboxes
			
 
				+    bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_size[0])
			
 
				+    bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_size[1])
			
 
				 
			
 
				     return bboxes