1 vuosi sitten · 1aa5d71b3b
--- a/yolo/dataset/coco.py
+++ b/yolo/dataset/coco.py
@@ -1,15 +1,15 @@
 
				 import os
			
 
				 import cv2
			
 
				 import time
			
 
				-import random
			
 
				 import numpy as np
			
 
				-from torch.utils.data import Dataset
			
 
				 from pycocotools.coco import COCO
			
 
				 
			
 
				 try:
			
 
				     from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				+    from .voc import VOCDataset
			
 
				 except:
			
 
				     from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				+    from  voc import VOCDataset
			
 
				 
			
 
				 
			
 
				 coco_class_indexs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
			
@@ -21,7 +21,7 @@ coco_json_files = {
 
				 }
			
 
				 
			
 
				 
			
 
				-class COCODataset(Dataset):
			
 
				+class COCODataset(VOCDataset):
			
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				                  data_dir  :str = None, 
			
@@ -46,106 +46,29 @@ class COCODataset(Dataset):
 
				         # ----------- Transform parameters -----------
			
 
				         self.transform = transform
			
 
				         if is_train:
			
 
				+            if cfg.mosaic_prob == 0.:
			
 
				+                self.mosaic_augment = None
			
 
				+            else:
			
 
				+                self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				             self.mosaic_prob = cfg.mosaic_prob
			
 
				+            if cfg.mixup_prob == 0.:
			
 
				+                self.mixup_augment = None
			
 
				+            else:
			
 
				+                self.mixup_augment = MixupAugment(cfg.train_img_size)
			
 
				             self.mixup_prob  = cfg.mixup_prob
			
 
				             self.copy_paste  = cfg.copy_paste
			
 
				-            self.mosaic_augment = None if cfg.mosaic_prob == 0. else MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				-            self.mixup_augment  = None if cfg.mixup_prob == 0. and cfg.copy_paste == 0.  else MixupAugment(cfg.train_img_size)
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
 
				             self.copy_paste  = 0.0
			
 
				             self.mosaic_augment = None
			
 
				             self.mixup_augment  = None
			
 
				+
			
 
				         print(' ============ Strong augmentation info. ============ ')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				         print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
			
 
				 
			
 
				-    # ------------ Basic dataset function ------------
			
 
				-    def __len__(self):
			
 
				-        return len(self.ids)
			
 
				-
			
 
				-    def __getitem__(self, index):
			
 
				-        return self.pull_item(index)
			
 
				-
			
 
				-    # ------------ Mosaic & Mixup ------------
			
 
				-    def load_mosaic(self, index):
			
 
				-        # ------------ Prepare 4 indexes of images ------------
			
 
				-        ## Load 4x mosaic image
			
 
				-        index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				-        id1 = index
			
 
				-        id2, id3, id4 = random.sample(index_list, 3)
			
 
				-        indexs = [id1, id2, id3, id4]
			
 
				-
			
 
				-        ## Load images and targets
			
 
				-        image_list = []
			
 
				-        target_list = []
			
 
				-        for index in indexs:
			
 
				-            img_i, target_i = self.load_image_target(index)
			
 
				-            image_list.append(img_i)
			
 
				-            target_list.append(target_i)
			
 
				-
			
 
				-        # ------------ Mosaic augmentation ------------
			
 
				-        image, target = self.mosaic_augment(image_list, target_list)
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-    def load_mixup(self, origin_image, origin_target, yolox_style=False):
			
 
				-        # ------------ Load a new image & target ------------
			
 
				-        if yolox_style:
			
 
				-            new_index = np.random.randint(0, len(self.ids))
			
 
				-            new_image, new_target = self.load_image_target(new_index)
			
 
				-        else:
			
 
				-            new_index = np.random.randint(0, len(self.ids))
			
 
				-            new_image, new_target = self.load_mosaic(new_index)
			
 
				-            
			
 
				-        # ------------ Mixup augmentation ------------
			
 
				-        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target, yolox_style)
			
 
				-
			
 
				-        return image, target
			
 
				-    
			
 
				-    # ------------ Load data function ------------
			
 
				-    def load_image_target(self, index):
			
 
				-        # load an image
			
 
				-        image, _ = self.pull_image(index)
			
 
				-        height, width, channels = image.shape
			
 
				-
			
 
				-        # load a target
			
 
				-        bboxes, labels = self.pull_anno(index)
			
 
				-        target = {
			
 
				-            "boxes": bboxes,
			
 
				-            "labels": labels,
			
 
				-            "orig_size": [height, width]
			
 
				-        }
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-    def pull_item(self, index):
			
 
				-        if random.random() < self.mosaic_prob:
			
 
				-            # load a mosaic image
			
 
				-            mosaic = True
			
 
				-            image, target = self.load_mosaic(index)
			
 
				-        else:
			
 
				-            mosaic = False
			
 
				-            # load an image and target
			
 
				-            image, target = self.load_image_target(index)
			
 
				-
			
 
				-        # Yolov5-MixUp
			
 
				-        mixup = False
			
 
				-        if random.random() < self.mixup_prob:
			
 
				-            mixup = True
			
 
				-            image, target = self.load_mixup(image, target)
			
 
				-
			
 
				-        # Copy-paste (use Yolox-Mixup to approximate copy-paste)
			
 
				-        if not mixup and random.random() < self.copy_paste:
			
 
				-            image, target = self.load_mixup(image, target, yolox_style=True)
			
 
				-
			
 
				-        # augment
			
 
				-        image, target, deltas = self.transform(image, target, mosaic)
			
 
				-
			
 
				-        return image, target, deltas
			
 
				-
			
 
				     def pull_image(self, index):
			
 
				         # get the image file name
			
 
				         image_dict = self.coco.dataset['images'][index]
			
--- a/yolo/dataset/custom.py
+++ b/yolo/dataset/custom.py
@@ -1,22 +1,22 @@
 
				 import os
			
 
				 import cv2
			
 
				 import time
			
 
				-import random
			
 
				 import numpy as np
			
 
				-from torch.utils.data import Dataset
			
 
				 from pycocotools.coco import COCO
			
 
				 
			
 
				 try:
			
 
				     from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				+    from .coco import COCODataset
			
 
				 except:
			
 
				     from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				+    from  coco import COCODataset
			
 
				 
			
 
				 
			
 
				 custom_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8]
			
 
				 custom_class_labels = ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', )
			
 
				 
			
 
				 
			
 
				-class CustomDataset(Dataset):
			
 
				+class CustomDataset(COCODataset):
			
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				                  data_dir     :str = None, 
			
@@ -40,106 +40,29 @@ class CustomDataset(Dataset):
 
				         # ----------- Transform parameters -----------
			
 
				         self.transform = transform
			
 
				         if is_train:
			
 
				+            if cfg.mosaic_prob == 0.:
			
 
				+                self.mosaic_augment = None
			
 
				+            else:
			
 
				+                self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				             self.mosaic_prob = cfg.mosaic_prob
			
 
				+            if cfg.mixup_prob == 0.:
			
 
				+                self.mixup_augment = None
			
 
				+            else:
			
 
				+                self.mixup_augment = MixupAugment(cfg.train_img_size)
			
 
				             self.mixup_prob  = cfg.mixup_prob
			
 
				             self.copy_paste  = cfg.copy_paste
			
 
				-            self.mosaic_augment = None if cfg.mosaic_prob == 0. else MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				-            self.mixup_augment  = None if cfg.mixup_prob == 0. and cfg.copy_paste == 0.  else MixupAugment(cfg.train_img_size)
			
 
				         else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
 
				             self.copy_paste  = 0.0
			
 
				             self.mosaic_augment = None
			
 
				             self.mixup_augment  = None
			
 
				+
			
 
				         print(' ============ Strong augmentation info. ============ ')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				         print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
			
 
				 
			
 
				-    # ------------ Basic dataset function ------------
			
 
				-    def __len__(self):
			
 
				-        return len(self.ids)
			
 
				-
			
 
				-    def __getitem__(self, index):
			
 
				-        return self.pull_item(index)
			
 
				-
			
 
				-    # ------------ Mosaic & Mixup ------------
			
 
				-    def load_mosaic(self, index):
			
 
				-        # ------------ Prepare 4 indexes of images ------------
			
 
				-        ## Load 4x mosaic image
			
 
				-        index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				-        id1 = index
			
 
				-        id2, id3, id4 = random.sample(index_list, 3)
			
 
				-        indexs = [id1, id2, id3, id4]
			
 
				-
			
 
				-        ## Load images and targets
			
 
				-        image_list = []
			
 
				-        target_list = []
			
 
				-        for index in indexs:
			
 
				-            img_i, target_i = self.load_image_target(index)
			
 
				-            image_list.append(img_i)
			
 
				-            target_list.append(target_i)
			
 
				-
			
 
				-        # ------------ Mosaic augmentation ------------
			
 
				-        image, target = self.mosaic_augment(image_list, target_list)
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-    def load_mixup(self, origin_image, origin_target, yolox_style=False):
			
 
				-        # ------------ Load a new image & target ------------
			
 
				-        if yolox_style:
			
 
				-            new_index = np.random.randint(0, len(self.ids))
			
 
				-            new_image, new_target = self.load_image_target(new_index)
			
 
				-        else:
			
 
				-            new_index = np.random.randint(0, len(self.ids))
			
 
				-            new_image, new_target = self.load_mosaic(new_index)
			
 
				-            
			
 
				-        # ------------ Mixup augmentation ------------
			
 
				-        image, target = self.mixup_augment(origin_image, origin_target, new_image, new_target, yolox_style)
			
 
				-
			
 
				-        return image, target
			
 
				-    
			
 
				-    # ------------ Load data function ------------
			
 
				-    def load_image_target(self, index):
			
 
				-        # load an image
			
 
				-        image, _ = self.pull_image(index)
			
 
				-        height, width, channels = image.shape
			
 
				-
			
 
				-        # load a target
			
 
				-        bboxes, labels = self.pull_anno(index)
			
 
				-        target = {
			
 
				-            "boxes": bboxes,
			
 
				-            "labels": labels,
			
 
				-            "orig_size": [height, width]
			
 
				-        }
			
 
				-
			
 
				-        return image, target
			
 
				-
			
 
				-    def pull_item(self, index):
			
 
				-        if random.random() < self.mosaic_prob:
			
 
				-            # load a mosaic image
			
 
				-            mosaic = True
			
 
				-            image, target = self.load_mosaic(index)
			
 
				-        else:
			
 
				-            mosaic = False
			
 
				-            # load an image and target
			
 
				-            image, target = self.load_image_target(index)
			
 
				-
			
 
				-        # Yolov5-MixUp
			
 
				-        mixup = False
			
 
				-        if random.random() < self.mixup_prob:
			
 
				-            mixup = True
			
 
				-            image, target = self.load_mixup(image, target)
			
 
				-
			
 
				-        # Copy-paste (use Yolox-Mixup to approximate copy-paste)
			
 
				-        if not mixup and random.random() < self.copy_paste:
			
 
				-            image, target = self.load_mixup(image, target, yolox_style=True)
			
 
				-
			
 
				-        # augment
			
 
				-        image, target, deltas = self.transform(image, target, mosaic)
			
 
				-
			
 
				-        return image, target, deltas
			
 
				-
			
 
				     def pull_image(self, index):
			
 
				         id_ = self.ids[index]
			
 
				         im_ann = self.coco.loadImgs(id_)[0] 
			
@@ -192,7 +115,7 @@ if __name__ == "__main__":
 
				     parser = argparse.ArgumentParser(description='RT-ODLab')
			
 
				 
			
 
				     # opt
			
 
				-    parser.add_argument('--root', default='D:/python_work/dataset/COCO/',
			
 
				+    parser.add_argument('--root', default='D:/python_work/dataset/AnimalDataset/',
			
 
				                         help='data root')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				                         help='mixup augmentation.')
			
@@ -262,7 +185,7 @@ if __name__ == "__main__":
 
				 
			
 
				     for i in range(1000):
			
 
				         t0 = time.time()
			
 
				-        image, target = dataset.pull_item(i)
			
 
				+        image, target, deltas = dataset.pull_item(i)
			
 
				         print("Load data: {} s".format(time.time() - t0))
			
 
				 
			
 
				         # to numpy
			
--- a/yolo/dataset/voc.py
+++ b/yolo/dataset/voc.py
@@ -3,7 +3,7 @@ import cv2
 
				 import time
			
 
				 import random
			
 
				 import numpy as np
			
 
				-from torch.utils.data import Dataset
			
 
				+import torch
			
 
				 from pycocotools.coco import COCO
			
 
				 
			
 
				 try:
			
@@ -16,7 +16,7 @@ voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
 
				 voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
			
 
				 
			
 
				 
			
 
				-class VOCDataset(Dataset):
			
 
				+class VOCDataset(torch.utils.data.Dataset):
			
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				                  data_dir  :str = None, 
			
@@ -39,12 +39,18 @@ class VOCDataset(Dataset):
 
				         # ----------- Transform parameters -----------
			
 
				         self.transform = transform
			
 
				         if is_train:
			
 
				+            if cfg.mosaic_prob == 0.:
			
 
				+                self.mosaic_augment = None
			
 
				+            else:
			
 
				+                self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				             self.mosaic_prob = cfg.mosaic_prob
			
 
				+            if cfg.mixup_prob == 0.:
			
 
				+                self.mixup_augment = None
			
 
				+            else:
			
 
				+                self.mixup_augment = MixupAugment(cfg.train_img_size)
			
 
				+        else:
			
 
				             self.mixup_prob  = cfg.mixup_prob
			
 
				             self.copy_paste  = cfg.copy_paste
			
 
				-            self.mosaic_augment = None if cfg.mosaic_prob == 0. else MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
			
 
				-            self.mixup_augment  = None if cfg.mixup_prob == 0. and cfg.copy_paste == 0.  else MixupAugment(cfg.train_img_size)
			
 
				-        else:
			
 
				             self.mosaic_prob = 0.0
			
 
				             self.mixup_prob  = 0.0
			
 
				             self.copy_paste  = 0.0
			
--- a/yolo/demo.py
+++ b/yolo/demo.py
@@ -16,7 +16,7 @@ from utils.box_ops import rescale_bboxes
 
				 from utils.vis_tools import visualize
			
 
				 
			
 
				 from models import build_model
			
 
				-from config import build_config\
			
 
				+from config import build_config
			
 
				 
			
 
				 from dataset.voc  import voc_class_labels
			
 
				 from dataset.coco import coco_class_labels
			
--- a/yolo/test.py
+++ b/yolo/test.py
@@ -21,7 +21,7 @@ from models import build_model
 
				 def parse_args():
			
 
				     parser = argparse.ArgumentParser(description='Real-time Object Detection LAB')
			
 
				     # Basic setting
			
 
				-    parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				+    parser.add_argument('--img_size', default=640, type=int,
			
 
				                         help='the max size of input image')
			
 
				     parser.add_argument('--show', action='store_true', default=False,
			
 
				                         help='show the visulization results.')
			
@@ -35,19 +35,17 @@ def parse_args():
 
				                         help='resize window of cv2 for visualization.')
			
 
				 
			
 
				     # Model setting
			
 
				-    parser.add_argument('-m', '--model', default='yolo_n', type=str,
			
 
				+    parser.add_argument('--model', default='yolo_n', type=str,
			
 
				                         help='build yolo')
			
 
				     parser.add_argument('--weight', default=None,
			
 
				                         type=str, help='Trained state_dict file path to open')
			
 
				     parser.add_argument('--fuse_conv_bn', action='store_true', default=False,
			
 
				                         help='fuse Conv & BN')
			
 
				-    parser.add_argument('--rep_conv', action='store_true', default=False,
			
 
				-                        help='fuse Rep VGG block')
			
 
				 
			
 
				     # Data setting
			
 
				     parser.add_argument('--root', default='D:/python_work/dataset/COCO/',
			
 
				                         help='data root')
			
 
				-    parser.add_argument('-d', '--dataset', default='coco',
			
 
				+    parser.add_argument('--dataset', default='coco',
			
 
				                         help='coco, voc.')
			
 
				 
			
 
				     return parser.parse_args()
			
@@ -129,7 +127,7 @@ if __name__ == '__main__':
 
				     model = build_model(args, cfg, is_val=False)
			
 
				 
			
 
				     # Load trained weight
			
 
				-    model = load_weight(model, args.weight, args.fuse_conv_bn, args.rep_conv)
			
 
				+    model = load_weight(model, args.weight, args.fuse_conv_bn)
			
 
				     model.to(device).eval()
			
 
				 
			
 
				     # Compute FLOPs and Params
			
--- a/yolo/utils/misc.py
+++ b/yolo/utils/misc.py
@@ -343,7 +343,7 @@ def compute_flops(model, img_size, device):
 
				     print('Params : {:.2f} M'.format(params / 1e6))
			
 
				 
			
 
				 ## load trained weight
			
 
				-def load_weight(model, path_to_ckpt, fuse_cbn=False, rep_conv=False):
			
 
				+def load_weight(model, path_to_ckpt, fuse_cbn=False):
			
 
				     # Check ckpt file
			
 
				     if path_to_ckpt is None:
			
 
				         print('no weight file ...')
			
@@ -369,11 +369,6 @@ def load_weight(model, path_to_ckpt, fuse_cbn=False, rep_conv=False):
 
				         print('Fusing Conv & BN ...')
			
 
				         model = fuse_conv_bn(model)
			
 
				 
			
 
				-    # Fuse RepConv
			
 
				-    if hasattr(model, "switch_to_deploy") and rep_conv:
			
 
				-        print("Reparam ...")
			
 
				-        model.switch_to_deploy()
			
 
				-
			
 
				     return model
			
 
				 
			
 
				 ## Model EMA