yjh0410 1 gadu atpakaļ
vecāks
revīzija
0b1bdcf7f6

+ 2 - 8
yolo/dataset/coco.py

@@ -46,15 +46,9 @@ class COCODataset(VOCDataset):
         # ----------- Transform parameters -----------
         # ----------- Transform parameters -----------
         self.transform = transform
         self.transform = transform
         if is_train:
         if is_train:
-            if cfg.mosaic_prob == 0.:
-                self.mosaic_augment = None
-            else:
-                self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
+            self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
+            self.mixup_augment = MixupAugment(cfg.train_img_size)
             self.mosaic_prob = cfg.mosaic_prob
             self.mosaic_prob = cfg.mosaic_prob
-            if cfg.mixup_prob == 0.:
-                self.mixup_augment = None
-            else:
-                self.mixup_augment = MixupAugment(cfg.train_img_size)
             self.mixup_prob  = cfg.mixup_prob
             self.mixup_prob  = cfg.mixup_prob
             self.copy_paste  = cfg.copy_paste
             self.copy_paste  = cfg.copy_paste
         else:
         else:

+ 39 - 0
yolo/dataset/data_augment/ssd_augment.py

@@ -553,3 +553,42 @@ class SSDBaseTransform(object):
         image, target = self.transform(image, target)
         image, target = self.transform(image, target)
 
 
         return image, target, ratio
         return image, target, ratio
+
+
+if __name__ == "__main__":
+    image_path = "voc_image.jpg"
+    is_train = True
+
+    if is_train:
+        ssd_augment = SSDAugmentation(img_size=512,
+                                      pixel_mean=[0., 0., 0.],
+                                      pixel_std=[255., 255., 255.],
+                                      box_format="xyxy",
+                                      normalize_coords=False,
+                                      )
+    else:
+        ssd_augment = SSDBaseTransform(img_size=512,
+                                       pixel_mean=[0., 0., 0.],
+                                       pixel_std=[255., 255., 255.],
+                                       box_format="xyxy",
+                                       normalize_coords=False,
+                                       )
+        
+    image = cv2.imread(image_path)
+    cv2.imshow("original image", image)
+    cv2.waitKey(0)
+
+    target = {
+        "boxes": np.array([[86, 96, 256, 425], [132, 71, 243, 282]], dtype=np.float32),
+        "labels": np.array([12, 14], dtype=np.int32),
+    }
+
+    image, target, _ = ssd_augment(image, target)
+    # [c, h, w] -> [h, w, c]
+    image = image.permute(1, 2, 0).contiguous().numpy()
+    image = np.clip(image * 255, 0, 255).astype(np.uint8)
+
+    # to bgr
+    image = image[:, :, (2, 1, 0)]
+    cv2.imshow("processed image", image)
+    cv2.waitKey(0)

BIN
yolo/dataset/data_augment/voc_image.jpg


+ 4 - 11
yolo/dataset/voc.py

@@ -39,15 +39,9 @@ class VOCDataset(torch.utils.data.Dataset):
         # ----------- Transform parameters -----------
         # ----------- Transform parameters -----------
         self.transform = transform
         self.transform = transform
         if is_train:
         if is_train:
-            if cfg.mosaic_prob == 0.:
-                self.mosaic_augment = None
-            else:
-                self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
+            self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
+            self.mixup_augment = MixupAugment(cfg.train_img_size)
             self.mosaic_prob = cfg.mosaic_prob
             self.mosaic_prob = cfg.mosaic_prob
-            if cfg.mixup_prob == 0.:
-                self.mixup_augment = None
-            else:
-                self.mixup_augment = MixupAugment(cfg.train_img_size)
             self.mixup_prob  = cfg.mixup_prob
             self.mixup_prob  = cfg.mixup_prob
             self.copy_paste  = cfg.copy_paste
             self.copy_paste  = cfg.copy_paste
         else:
         else:
@@ -170,7 +164,6 @@ class VOCDataset(torch.utils.data.Dataset):
         anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=None)
         anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=None)
         annotations = self.coco.loadAnns(anno_ids)
         annotations = self.coco.loadAnns(anno_ids)
 
 
-        
         #load a target
         #load a target
         bboxes = []
         bboxes = []
         labels = []
         labels = []
@@ -192,7 +185,7 @@ class VOCDataset(torch.utils.data.Dataset):
         # guard against no boxes via resizing
         # guard against no boxes via resizing
         bboxes = np.array(bboxes).reshape(-1, 4)
         bboxes = np.array(bboxes).reshape(-1, 4)
         labels = np.array(labels).reshape(-1)
         labels = np.array(labels).reshape(-1)
-        
+                
         return bboxes, labels
         return bboxes, labels
 
 
 
 
@@ -204,7 +197,7 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='COCO-Dataset')
     parser = argparse.ArgumentParser(description='COCO-Dataset')
 
 
     # opt
     # opt
-    parser.add_argument('--root', default="D:/python_work/dataset/VOCdevkit/",
+    parser.add_argument('--root', default="D:/python_work/dataset/VOC0712/",
                         help='data root')
                         help='data root')
     parser.add_argument('--is_train', action="store_true", default=False,
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
                         help='mixup augmentation.')