Przeglądaj źródła

modify transforms

yjh0410 1 rok temu
rodzic
commit
7a392783f8

+ 7 - 5
dataset/data_augment/ssd_augment.py

@@ -363,10 +363,12 @@ class SSDAugmentation(object):
         ])
 
     def __call__(self, image, target, mosaic=False):
+        orig_h, orig_w = image.shape[:2]
+        ratio = [self.img_size / orig_w, self.img_size / orig_h]
+
+        # augment
         boxes = target['boxes'].copy()
         labels = target['labels'].copy()
-        deltas = None
-        # augment
         image, boxes, labels = self.augment(image, boxes, labels)
 
         # to tensor
@@ -375,7 +377,7 @@ class SSDAugmentation(object):
         target['labels'] = torch.from_numpy(labels).float()
         
 
-        return img_tensor, target, deltas
+        return img_tensor, target, ratio
     
 
 ## SSD-style valTransform
@@ -384,9 +386,9 @@ class SSDBaseTransform(object):
         self.img_size = img_size
 
     def __call__(self, image, target=None, mosaic=False):
-        deltas = None
         # resize
         orig_h, orig_w = image.shape[:2]
+        ratio = [self.img_size / orig_w, self.img_size / orig_h]
         image = cv2.resize(image, (self.img_size, self.img_size)).astype(np.float32)
         
         # scale targets
@@ -404,4 +406,4 @@ class SSDBaseTransform(object):
             target['boxes'] = torch.from_numpy(boxes).float()
             target['labels'] = torch.from_numpy(labels).float()
             
-        return img_tensor, target, deltas
+        return img_tensor, target, ratio

+ 8 - 8
dataset/data_augment/yolov5_augment.py

@@ -339,10 +339,10 @@ class YOLOv5Augmentation(object):
     def __call__(self, image, target, mosaic=False):
         # --------------- Keep ratio Resize ---------------
         img_h0, img_w0 = image.shape[:2]
-        r = self.img_size / max(img_h0, img_w0)
-        if r != 1: 
+        ratio = self.img_size / max(img_h0, img_w0)
+        if ratio != 1: 
             interp = cv2.INTER_LINEAR
-            new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
+            new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
             img = cv2.resize(image, new_shape, interpolation=interp)
         else:
             img = image
@@ -406,7 +406,7 @@ class YOLOv5Augmentation(object):
         dh = self.img_size - img_h0
         dw = self.img_size - img_w0
 
-        return pad_image, target, [dw, dh]
+        return pad_image, target, ratio #[dw, dh]
 
 ## YOLOv5-style Transform for Eval
 class YOLOv5BaseTransform(object):
@@ -419,9 +419,9 @@ class YOLOv5BaseTransform(object):
         # --------------- Keep ratio Resize ---------------
         ## Resize image
         img_h0, img_w0 = image.shape[:2]
-        r = self.img_size / max(img_h0, img_w0)
-        if r != 1: 
-            new_shape = (int(round(img_w0 * r)), int(round(img_h0 * r)))
+        ratio = self.img_size / max(img_h0, img_w0)
+        if ratio != 1: 
+            new_shape = (int(round(img_w0 * ratio)), int(round(img_h0 * ratio)))
             img = cv2.resize(image, new_shape, interpolation=cv2.INTER_LINEAR)
         else:
             img = image
@@ -452,4 +452,4 @@ class YOLOv5BaseTransform(object):
         pad_image = torch.ones([img_tensor.size(0), pad_img_h, pad_img_w]).float() * 114.
         pad_image[:, :img_h0, :img_w0] = img_tensor
 
-        return pad_image, target, [dw, dh]
+        return pad_image, target, ratio #[dw, dh]

+ 2 - 4
evaluator/coco_evaluator.py

@@ -68,7 +68,7 @@ class COCOAPIEvaluator():
             orig_h, orig_w, _ = img.shape
 
             # preprocess
-            x, _, deltas = self.transform(img)
+            x, _, ratio = self.transform(img)
             x = x.unsqueeze(0).to(self.device) / 255.
             
             id_ = int(id_)
@@ -79,9 +79,7 @@ class COCOAPIEvaluator():
             bboxes, scores, cls_inds = outputs
 
             # rescale bboxes
-            origin_img_size = [orig_h, orig_w]
-            cur_img_size = [*x.shape[-2:]]
-            bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
+            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
 
             # process outputs
             for i, box in enumerate(bboxes):

+ 3 - 5
test.py

@@ -59,7 +59,7 @@ def parse_args():
                         help='Perform NMS operations regardless of category.')
 
     # dataset
-    parser.add_argument('--root', default='/mnt/share/ssd2/dataset',
+    parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
                         help='data root')
     parser.add_argument('-d', '--dataset', default='coco',
                         help='coco, voc.')
@@ -136,7 +136,7 @@ def test(args,
         orig_h, orig_w, _ = image.shape
 
         # prepare
-        x, _, deltas = transform(image)
+        x, _, ratio = transform(image)
         x = x.unsqueeze(0).to(device) / 255.
 
         t0 = time.time()
@@ -145,9 +145,7 @@ def test(args,
         print("detection time used ", time.time() - t0, "s")
         
         # rescale bboxes
-        origin_img_size = [orig_h, orig_w]
-        cur_img_size = [*x.shape[-2:]]
-        bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
+        bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
 
         # vis detection
         img_processed = visualize(

+ 7 - 19
utils/box_ops.py

@@ -17,25 +17,13 @@ def box_xyxy_to_cxcywh(x):
          (x1 - x0), (y1 - y0)]
     return torch.stack(b, dim=-1)
 
-def rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas=None):
-    origin_h, origin_w = origin_img_size
-    cur_img_h, cur_img_w = cur_img_size
-    if deltas is None:
-        # rescale
-        bboxes[..., [0, 2]] = bboxes[..., [0, 2]] / cur_img_w * origin_w
-        bboxes[..., [1, 3]] = bboxes[..., [1, 3]] / cur_img_h * origin_h
-
-        # clip bboxes
-        bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_w)
-        bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_h)
-    else:
-        # rescale
-        bboxes[..., [0, 2]] = bboxes[..., [0, 2]] / (cur_img_w - deltas[0]) * origin_w
-        bboxes[..., [1, 3]] = bboxes[..., [1, 3]] / (cur_img_h - deltas[1]) * origin_h
-        
-        # clip bboxes
-        bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_w)
-        bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_h)
+def rescale_bboxes(bboxes, origin_size, ratio):
+    # rescale bboxes
+    bboxes /= ratio
+
+    # clip bboxes
+    bboxes[..., [0, 2]] = np.clip(bboxes[..., [0, 2]], a_min=0., a_max=origin_size[0])
+    bboxes[..., [1, 3]] = np.clip(bboxes[..., [1, 3]], a_min=0., a_max=origin_size[1])
 
     return bboxes