1 rok pred · 55b0dc15b2
--- a/yolo/dataset/build.py
+++ b/yolo/dataset/build.py
@@ -2,18 +2,18 @@ import os
 
				 
			
 
				 try:
			
 
				     # dataset class
			
 
				-    from .voc        import VOCDataset
			
 
				-    from .coco       import COCODataset
			
 
				-    from .custom     import CustomDataset
			
 
				+    from .voc     import VOCDataset
			
 
				+    from .coco    import COCODataset
			
 
				+    from .custom  import CustomDataset
			
 
				     # transform class
			
 
				     from .data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
			
 
				     from .data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
			
 
				 
			
 
				 except:
			
 
				     # dataset class
			
 
				-    from voc        import VOCDataset
			
 
				-    from coco       import COCODataset
			
 
				-    from yolo.dataset.custom   import CustomDataset
			
 
				+    from voc     import VOCDataset
			
 
				+    from coco    import COCODataset
			
 
				+    from custom  import CustomDataset
			
 
				     # transform class
			
 
				     from data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
			
 
				     from data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
			
@@ -24,19 +24,15 @@ def build_dataset(args, cfg, transform=None, is_train=False):
 
				     # ------------------------- Build dataset -------------------------
			
 
				     ## VOC dataset
			
 
				     if args.dataset == 'voc':
			
 
				-        image_set = [('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')]
			
 
				         dataset = VOCDataset(cfg       = cfg,
			
 
				                              data_dir  = args.root,
			
 
				-                             image_set = image_set,
			
 
				                              transform = transform,
			
 
				                              is_train  = is_train,
			
 
				                              )
			
 
				     ## COCO dataset
			
 
				     elif args.dataset == 'coco':
			
 
				-        image_set = 'train2017' if is_train else 'val2017'
			
 
				         dataset = COCODataset(cfg       = cfg,
			
 
				                               data_dir  = args.root,
			
 
				-                              image_set = image_set,
			
 
				                               transform = transform,
			
 
				                               is_train  = is_train,
			
 
				                               )
			
--- a/yolo/dataset/coco.py
+++ b/yolo/dataset/coco.py
@@ -25,19 +25,18 @@ class COCODataset(Dataset):
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				                  data_dir  :str = None, 
			
 
				-                 image_set :str = 'train2017',
			
 
				                  transform = None,
			
 
				                  is_train  :bool = False,
			
 
				                  use_mask  :bool = False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				         self.data_dir  = data_dir
			
 
				-        self.image_set = image_set
			
 
				+        self.image_set = "train2017" if is_train else "val2017"
			
 
				         self.is_train  = is_train
			
 
				         self.use_mask  = use_mask
			
 
				         self.num_classes = 80
			
 
				         # ----------- Data parameters -----------
			
 
				-        self.json_file = coco_json_files['{}'.format(image_set)]
			
 
				+        self.json_file = coco_json_files['{}'.format(self.image_set)]
			
 
				         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
			
 
				         self.ids = self.coco.getImgIds()
			
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
@@ -148,19 +147,18 @@ class COCODataset(Dataset):
 
				         return image, target, deltas
			
 
				 
			
 
				     def pull_image(self, index):
			
 
				-        img_id = self.ids[index]
			
 
				-        img_file = os.path.join(self.data_dir, self.image_set,
			
 
				-                                '{:012}'.format(img_id) + '.jpg')
			
 
				-        image = cv2.imread(img_file)
			
 
				+        # get the image file name
			
 
				+        image_dict = self.coco.dataset['images'][index]
			
 
				+        image_id = image_dict["id"]
			
 
				+        filename = image_dict["file_name"]
			
 
				 
			
 
				-        if self.json_file == 'instances_val5k.json' and image is None:
			
 
				-            img_file = os.path.join(self.data_dir, 'train2017',
			
 
				-                                    '{:012}'.format(img_id) + '.jpg')
			
 
				-            image = cv2.imread(img_file)
			
 
				+        # load the image
			
 
				+        image_path = os.path.join(self.data_dir, self.image_set, filename)
			
 
				+        image = cv2.imread(image_path)
			
 
				 
			
 
				         assert image is not None
			
 
				 
			
 
				-        return image, img_id
			
 
				+        return image, image_id
			
 
				 
			
 
				     def pull_anno(self, index):
			
 
				         img_id = self.ids[index]
			
@@ -265,7 +263,7 @@ if __name__ == "__main__":
 
				         cfg = SSDBaseConfig()
			
 
				 
			
 
				     transform = build_transform(cfg, args.is_train)
			
 
				-    dataset = COCODataset(cfg, args.root, 'val2017', transform, args.is_train)
			
 
				+    dataset = COCODataset(cfg, args.root, transform, args.is_train)
			
 
				     
			
 
				     np.random.seed(0)
			
 
				     class_colors = [(np.random.randint(255),
			
--- a/yolo/dataset/custom.py
+++ b/yolo/dataset/custom.py
@@ -20,19 +20,18 @@ class CustomDataset(Dataset):
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				                  data_dir     :str = None, 
			
 
				-                 image_set    :str = 'train2017',
			
 
				                  transform    = None,
			
 
				                  is_train     :bool =False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				-        self.image_set = image_set
			
 
				+        self.image_set = "train" if is_train else "val"
			
 
				         self.is_train  = is_train
			
 
				         self.num_classes = len(custom_class_labels)
			
 
				         # ----------- Path parameters -----------
			
 
				         self.data_dir = data_dir
			
 
				-        self.json_file = '{}.json'.format(image_set)
			
 
				+        self.json_file = '{}.json'.format(self.image_set)
			
 
				         # ----------- Data parameters -----------
			
 
				-        self.coco = COCO(os.path.join(self.data_dir, image_set, 'annotations', self.json_file))
			
 
				+        self.coco = COCO(os.path.join(self.data_dir, self.image_set, 'annotations', self.json_file))
			
 
				         self.ids = self.coco.getImgIds()
			
 
				         self.class_ids = sorted(self.coco.getCatIds())
			
 
				         self.dataset_size = len(self.ids)
			
@@ -53,7 +52,7 @@ class CustomDataset(Dataset):
 
				             self.mosaic_augment = None
			
 
				             self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				-        print('Image Set: {}'.format(image_set))
			
 
				+        print('Image Set: {}'.format(self.image_set))
			
 
				         print('Json file: {}'.format(self.json_file))
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
@@ -255,7 +254,7 @@ if __name__ == "__main__":
 
				         cfg = SSDBaseConfig()
			
 
				 
			
 
				     transform = build_transform(cfg, args.is_train)
			
 
				-    dataset = CustomDataset(cfg, args.root, 'val', transform, args.is_train)
			
 
				+    dataset = CustomDataset(cfg, args.root, transform, args.is_train)
			
 
				     
			
 
				     np.random.seed(0)
			
 
				     class_colors = [(np.random.randint(255),
			
--- a/yolo/dataset/scripts/data_to_h5py.py
+++ b/yolo/dataset/scripts/data_to_h5py.py
@@ -1,70 +0,0 @@
 
				-import cv2
			
 
				-import h5py
			
 
				-import os
			
 
				-import argparse
			
 
				-import numpy as np
			
 
				-import sys
			
 
				-
			
 
				-sys.path.append('..')
			
 
				-from voc import VOCDetection
			
 
				-from coco import COCODataset
			
 
				-
			
 
				-# ---------------------- Opt ----------------------
			
 
				-parser = argparse.ArgumentParser(description='Cache-Dataset')
			
 
				-parser.add_argument('-d', '--dataset', default='voc',
			
 
				-                    help='coco, voc, widerface, crowdhuman')
			
 
				-parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
			
 
				-                    help='data root')
			
 
				-parser.add_argument('-size', '--img_size', default=640, type=int,
			
 
				-                    help='input image size.')
			
 
				-parser.add_argument('--mosaic', default=None, type=float,
			
 
				-                    help='mosaic augmentation.')
			
 
				-parser.add_argument('--mixup', default=None, type=float,
			
 
				-                    help='mixup augmentation.')
			
 
				-parser.add_argument('--keep_ratio', action="store_true", default=False,
			
 
				-                    help='keep aspect ratio.')
			
 
				-parser.add_argument('--show', action="store_true", default=False,
			
 
				-                    help='keep aspect ratio.')
			
 
				-
			
 
				-args = parser.parse_args()
			
 
				-
			
 
				-
			
 
				-# ---------------------- Build Dataset ----------------------
			
 
				-if args.dataset == 'voc':
			
 
				-    root = os.path.join(args.root, 'VOCdevkit')
			
 
				-    dataset = VOCDetection(args.img_size, root)
			
 
				-elif args.dataset == 'coco':
			
 
				-    root = os.path.join(args.root, 'COCO')
			
 
				-    dataset = COCODataset(args.img_size, args.root)
			
 
				-print('Data length: ', len(dataset))
			
 
				-
			
 
				-
			
 
				-# ---------------------- Main Process ----------------------
			
 
				-cached_image = []
			
 
				-dataset_size = len(dataset)
			
 
				-for i in range(len(dataset)):
			
 
				-    if i % 5000 == 0:
			
 
				-        print("[{} / {}]".format(i, dataset_size))
			
 
				-    # load an image
			
 
				-    image, image_id = dataset.pull_image(i)
			
 
				-    orig_h, orig_w, _ = image.shape
			
 
				-
			
 
				-    # resize image
			
 
				-    if args.keep_ratio:
			
 
				-        r = args.img_size / max(orig_h, orig_w)
			
 
				-        if r != 1: 
			
 
				-            interp = cv2.INTER_LINEAR
			
 
				-            new_size = (int(orig_w * r), int(orig_h * r))
			
 
				-            image = cv2.resize(image, new_size, interpolation=interp)
			
 
				-    else:
			
 
				-        image = cv2.resize(image, (int(args.img_size), int(args.img_size)))
			
 
				-
			
 
				-    cached_image.append(image)
			
 
				-    if args.show:
			
 
				-        cv2.imshow('image', image)
			
 
				-        # cv2.imwrite(str(i)+'.jpg', img)
			
 
				-        cv2.waitKey(0)
			
 
				-
			
 
				-save_path = "dataset/cache/"
			
 
				-os.makedirs(save_path, exist_ok=True)
			
 
				-np.save(save_path + '{}_train_images.npy'.format(args.dataset), cached_image)
			
--- a/yolo/dataset/voc.py
+++ b/yolo/dataset/voc.py
@@ -1,9 +1,10 @@
 
				+import os
			
 
				 import cv2
			
 
				+import time
			
 
				 import random
			
 
				 import numpy as np
			
 
				-import os.path as osp
			
 
				-import xml.etree.ElementTree as ET
			
 
				-import torch.utils.data as data
			
 
				+from torch.utils.data import Dataset
			
 
				+from pycocotools.coco import COCO
			
 
				 
			
 
				 try:
			
 
				     from .data_augment.strong_augment import MosaicAugment, MixupAugment
			
@@ -11,65 +12,31 @@ except:
 
				     from  data_augment.strong_augment import MosaicAugment, MixupAugment
			
 
				 
			
 
				 
			
 
				-VOC_CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
			
 
				 voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
			
 
				 voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
			
 
				 
			
 
				 
			
 
				-class VOCAnnotationTransform(object):
			
 
				-    def __init__(self, class_to_ind=None, keep_difficult=False):
			
 
				-        self.class_to_ind = class_to_ind or dict(
			
 
				-            zip(VOC_CLASSES, range(len(VOC_CLASSES))))
			
 
				-        self.keep_difficult = keep_difficult
			
 
				-
			
 
				-    def __call__(self, target):
			
 
				-        res = []
			
 
				-        for obj in target.iter('object'):
			
 
				-            difficult = int(obj.find('difficult').text) == 1
			
 
				-            if not self.keep_difficult and difficult:
			
 
				-                continue
			
 
				-            name = obj.find('name').text.lower().strip()
			
 
				-            bbox = obj.find('bndbox')
			
 
				-
			
 
				-            pts = ['xmin', 'ymin', 'xmax', 'ymax']
			
 
				-            bndbox = []
			
 
				-            for i, pt in enumerate(pts):
			
 
				-                cur_pt = int(bbox.find(pt).text) - 1
			
 
				-                bndbox.append(cur_pt)
			
 
				-            label_idx = self.class_to_ind[name]
			
 
				-            bndbox.append(label_idx)
			
 
				-            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
			
 
				-
			
 
				-        return res  # [[x1, y1, x2, y2, label_ind], ... ]
			
 
				-
			
 
				-
			
 
				-class VOCDataset(data.Dataset):
			
 
				+class VOCDataset(Dataset):
			
 
				     def __init__(self, 
			
 
				                  cfg,
			
 
				-                 data_dir   :str = None, 
			
 
				-                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
			
 
				-                 transform  = None,
			
 
				-                 is_train   :bool =False,
			
 
				+                 data_dir  :str = None, 
			
 
				+                 transform = None,
			
 
				+                 is_train  :bool = False,
			
 
				                  ):
			
 
				         # ----------- Basic parameters -----------
			
 
				-        self.image_set = image_set
			
 
				+        self.data_dir  = data_dir
			
 
				+        self.image_set = "train" if is_train else "val"
			
 
				         self.is_train  = is_train
			
 
				         self.num_classes = 20
			
 
				-        # ----------- Path parameters -----------
			
 
				-        self.root = data_dir
			
 
				-        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
			
 
				-        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
			
 
				         # ----------- Data parameters -----------
			
 
				-        self.ids = list()
			
 
				-        for (year, name) in image_set:
			
 
				-            rootpath = osp.join(self.root, 'VOC' + year)
			
 
				-            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
			
 
				-                self.ids.append((rootpath, line.strip()))
			
 
				+        self.json_file = "instances_{}.json".format(self.image_set)
			
 
				+        self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
			
 
				+        self.ids = self.coco.getImgIds()
			
 
				+        self.class_ids = sorted(self.coco.getCatIds())
			
 
				         self.dataset_size = len(self.ids)
			
 
				         self.class_labels = voc_class_labels
			
 
				         self.class_indexs = voc_class_indexs
			
 
				         # ----------- Transform parameters -----------
			
 
				-        self.target_transform = VOCAnnotationTransform()
			
 
				         self.transform = transform
			
 
				         if is_train:
			
 
				             self.mosaic_prob = cfg.mosaic_prob
			
@@ -85,16 +52,15 @@ class VOCDataset(data.Dataset):
 
				             self.mixup_augment  = None
			
 
				         print('==============================')
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				-        print('use Mixup Augmentation:  {}'.format(self.mixup_prob))
			
 
				+        print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				         print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
			
 
				 
			
 
				     # ------------ Basic dataset function ------------
			
 
				-    def __getitem__(self, index):
			
 
				-        image, target, deltas = self.pull_item(index)
			
 
				-        return image, target, deltas
			
 
				-
			
 
				     def __len__(self):
			
 
				-        return self.dataset_size
			
 
				+        return len(self.ids)
			
 
				+
			
 
				+    def __getitem__(self, index):
			
 
				+        return self.pull_item(index)
			
 
				 
			
 
				     # ------------ Mosaic & Mixup ------------
			
 
				     def load_mosaic(self, index):
			
@@ -138,17 +104,14 @@ class VOCDataset(data.Dataset):
 
				         image, _ = self.pull_image(index)
			
 
				         height, width, channels = image.shape
			
 
				 
			
 
				-        # laod an annotation
			
 
				-        anno, _ = self.pull_anno(index)
			
 
				-
			
 
				-        # guard against no boxes via resizing
			
 
				-        anno = np.array(anno).reshape(-1, 5)
			
 
				+        # load a target
			
 
				+        bboxes, labels = self.pull_anno(index)
			
 
				         target = {
			
 
				-            "boxes": anno[:, :4],
			
 
				-            "labels": anno[:, 4],
			
 
				+            "boxes": bboxes,
			
 
				+            "labels": labels,
			
 
				             "orig_size": [height, width]
			
 
				         }
			
 
				-        
			
 
				+
			
 
				         return image, target
			
 
				 
			
 
				     def pull_item(self, index):
			
@@ -177,17 +140,54 @@ class VOCDataset(data.Dataset):
 
				         return image, target, deltas
			
 
				 
			
 
				     def pull_image(self, index):
			
 
				-        img_id = self.ids[index]
			
 
				-        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
			
 
				+        # get the image file name
			
 
				+        image_dict = self.coco.dataset['images'][index]
			
 
				+        image_id = image_dict["id"]
			
 
				+        filename = image_dict["file_name"]
			
 
				+
			
 
				+        # load the image
			
 
				+        image_path = os.path.join(self.data_dir, "images", filename)
			
 
				+        image = cv2.imread(image_path)
			
 
				 
			
 
				-        return image, img_id
			
 
				+        assert image is not None
			
 
				+
			
 
				+        return image, image_id
			
 
				 
			
 
				     def pull_anno(self, index):
			
 
				-        img_id = self.ids[index]
			
 
				-        anno = ET.parse(self._annopath % img_id).getroot()
			
 
				-        anno = self.target_transform(anno)
			
 
				+        img_id = self.ids[index]        
			
 
				+        # image infor
			
 
				+        im_ann = self.coco.loadImgs(img_id)[0]
			
 
				+        width = im_ann['width']
			
 
				+        height = im_ann['height']
			
 
				 
			
 
				-        return anno, img_id
			
 
				+        # annotation infor
			
 
				+        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=None)
			
 
				+        annotations = self.coco.loadAnns(anno_ids)
			
 
				+
			
 
				+        
			
 
				+        #load a target
			
 
				+        bboxes = []
			
 
				+        labels = []
			
 
				+        for anno in annotations:
			
 
				+            if 'bbox' in anno and anno['area'] > 0:
			
 
				+                # bbox
			
 
				+                x1 = np.max((0, anno['bbox'][0]))
			
 
				+                y1 = np.max((0, anno['bbox'][1]))
			
 
				+                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
			
 
				+                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
			
 
				+                if x2 < x1 or y2 < y1:
			
 
				+                    continue
			
 
				+                # class label
			
 
				+                cls_id = self.class_ids.index(anno['category_id'])
			
 
				+                
			
 
				+                bboxes.append([x1, y1, x2, y2])
			
 
				+                labels.append(cls_id)
			
 
				+
			
 
				+        # guard against no boxes via resizing
			
 
				+        bboxes = np.array(bboxes).reshape(-1, 4)
			
 
				+        labels = np.array(labels).reshape(-1)
			
 
				+        
			
 
				+        return bboxes, labels
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
@@ -195,16 +195,16 @@ if __name__ == "__main__":
 
				     import argparse
			
 
				     from build import build_transform
			
 
				     
			
 
				-    parser = argparse.ArgumentParser(description='VOC-Dataset')
			
 
				+    parser = argparse.ArgumentParser(description='COCO-Dataset')
			
 
				 
			
 
				     # opt
			
 
				-    parser.add_argument('--root', default='D:/python_work/dataset/VOCdevkit/',
			
 
				+    parser.add_argument('--root', default="D:/python_work/dataset/VOCdevkit/",
			
 
				                         help='data root')
			
 
				     parser.add_argument('--is_train', action="store_true", default=False,
			
 
				-                        help='train or not.')
			
 
				+                        help='mixup augmentation.')
			
 
				     parser.add_argument('--aug_type', default="yolo", type=str, choices=["yolo", "ssd"],
			
 
				                         help='yolo, ssd.')
			
 
				-    
			
 
				+
			
 
				     args = parser.parse_args()
			
 
				 
			
 
				     class YoloBaseConfig(object):
			
@@ -258,7 +258,7 @@ if __name__ == "__main__":
 
				         cfg = SSDBaseConfig()
			
 
				 
			
 
				     transform = build_transform(cfg, args.is_train)
			
 
				-    dataset = VOCDataset(cfg, args.root, [('2007', 'test')], transform, args.is_train)
			
 
				+    dataset = VOCDataset(cfg, args.root, transform, args.is_train)
			
 
				     
			
 
				     np.random.seed(0)
			
 
				     class_colors = [(np.random.randint(255),
			
@@ -314,4 +314,4 @@ if __name__ == "__main__":
 
				             cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
			
 
				         cv2.imshow('gt', image)
			
 
				         # cv2.imwrite(str(i)+'.jpg', img)
			
 
				-        cv2.waitKey(0)
			
 
				+        cv2.waitKey(0)
			
--- a/yolo/eval.py
+++ b/yolo/eval.py
@@ -2,9 +2,7 @@ import argparse
 
				 import torch
			
 
				 
			
 
				 # evaluators
			
 
				-from evaluator.voc_evaluator    import VOCAPIEvaluator
			
 
				-from evaluator.coco_evaluator   import COCOAPIEvaluator
			
 
				-from evaluator.custom_evaluator import CustomEvaluator
			
 
				+from evaluator.map_evaluator import MapEvaluator
			
 
				 
			
 
				 # load transform
			
 
				 from dataset.build import build_dataset, build_transform
			
@@ -47,40 +45,6 @@ def parse_args():
 
				     return parser.parse_args()
			
 
				 
			
 
				 
			
 
				-
			
 
				-def voc_test(cfg, model, data_dir, device, transform):
			
 
				-    evaluator = VOCAPIEvaluator(cfg=cfg,
			
 
				-                                data_dir=data_dir,
			
 
				-                                device=device,
			
 
				-                                transform=transform,
			
 
				-                                display=True)
			
 
				-
			
 
				-    # VOC evaluation
			
 
				-    evaluator.evaluate(model)
			
 
				-
			
 
				-def coco_test(cfg, model, data_dir, device, transform):
			
 
				-    # eval
			
 
				-    evaluator = COCOAPIEvaluator(
			
 
				-                    cfg=cfg,
			
 
				-                    data_dir=data_dir,
			
 
				-                    device=device,
			
 
				-                    transform=transform)
			
 
				-
			
 
				-    # COCO evaluation
			
 
				-    evaluator.evaluate(model)
			
 
				-
			
 
				-def custom_test(cfg, model, data_dir, device, transform):
			
 
				-    evaluator = CustomEvaluator(
			
 
				-        cfg=cfg,
			
 
				-        data_dir=data_dir,
			
 
				-        device=device,
			
 
				-        image_set='val',
			
 
				-        transform=transform)
			
 
				-
			
 
				-    # WiderFace evaluation
			
 
				-    evaluator.evaluate(model)
			
 
				-
			
 
				-
			
 
				 if __name__ == '__main__':
			
 
				     args = parse_args()
			
 
				     # cuda
			
@@ -107,10 +71,10 @@ if __name__ == '__main__':
 
				     model.to(device).eval()
			
 
				 
			
 
				     # evaluation
			
 
				-    with torch.no_grad():
			
 
				-        if args.dataset == 'voc':
			
 
				-            voc_test(cfg, model, args.root, device, transform)
			
 
				-        elif args.dataset == 'coco':
			
 
				-            coco_test(cfg, model, args.root, device, transform)
			
 
				-        elif args.dataset == 'custom':
			
 
				-            custom_test(cfg, model, args.root, device, transform)
			
 
				+    evaluator = MapEvaluator(cfg = cfg,
			
 
				+                             dataset_name = args.dataset,
			
 
				+                             data_dir  = args.root,
			
 
				+                             device    = device,
			
 
				+                             transform = transform
			
 
				+                             )
			
 
				+    evaluator.evaluate(model)
			
--- a/yolo/evaluator/build.py
+++ b/yolo/evaluator/build.py
@@ -1,33 +0,0 @@
 
				-import os
			
 
				-
			
 
				-from evaluator.coco_evaluator import COCOAPIEvaluator
			
 
				-from evaluator.voc_evaluator import VOCAPIEvaluator
			
 
				-from evaluator.custom_evaluator import CustomEvaluator
			
 
				-
			
 
				-
			
 
				-
			
 
				-def build_evluator(args, cfg, transform, device):
			
 
				-    # Evaluator
			
 
				-    ## VOC Evaluator
			
 
				-    if args.dataset == 'voc':
			
 
				-        evaluator = VOCAPIEvaluator(cfg       = cfg,
			
 
				-                                    data_dir  = args.root,
			
 
				-                                    device    = device,
			
 
				-                                    transform = transform
			
 
				-                                    )
			
 
				-    ## COCO Evaluator
			
 
				-    elif args.dataset == 'coco':
			
 
				-        evaluator = COCOAPIEvaluator(cfg       = cfg,
			
 
				-                                     data_dir  = args.root,
			
 
				-                                     device    = device,
			
 
				-                                     transform = transform
			
 
				-                                     )
			
 
				-    ## Custom dataset Evaluator
			
 
				-    elif args.dataset == 'ourdataset':
			
 
				-        evaluator = CustomEvaluator(cfg       = cfg,
			
 
				-                                      data_dir  = args.root,
			
 
				-                                      device    = device,
			
 
				-                                      transform = transform
			
 
				-                                      )
			
 
				-
			
 
				-    return evaluator
			
--- a/yolo/evaluator/custom_evaluator.py
+++ b/yolo/evaluator/custom_evaluator.py
@@ -1,111 +0,0 @@
 
				-import json
			
 
				-import tempfile
			
 
				-import torch
			
 
				-from dataset.custom import CustomDataset
			
 
				-from utils.box_ops import rescale_bboxes
			
 
				-
			
 
				-try:
			
 
				-    from pycocotools.cocoeval import COCOeval
			
 
				-except:
			
 
				-    print("It seems that the COCOAPI is not installed.")
			
 
				-
			
 
				-
			
 
				-class CustomEvaluator():
			
 
				-    def __init__(self, cfg, data_dir, device, image_set='val', transform=None):
			
 
				-        # ----------------- Basic parameters -----------------
			
 
				-        self.image_set = image_set
			
 
				-        self.transform = transform
			
 
				-        self.device = device
			
 
				-        # ----------------- Metrics -----------------
			
 
				-        self.map = 0.
			
 
				-        self.ap50_95 = 0.
			
 
				-        self.ap50 = 0.
			
 
				-        # ----------------- Dataset -----------------
			
 
				-        self.dataset = CustomDataset(cfg, data_dir=data_dir, image_set=image_set, transform=None, is_train=False)
			
 
				-
			
 
				-    @torch.no_grad()
			
 
				-    def evaluate(self, model):
			
 
				-        """
			
 
				-        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
			
 
				-        and the results are evaluated by COCO API.
			
 
				-        Args:
			
 
				-            model : model object
			
 
				-        Returns:
			
 
				-            ap50_95 (float) : calculated COCO AP for IoU=50:95
			
 
				-            ap50 (float) : calculated COCO AP for IoU=50
			
 
				-        """
			
 
				-        model.eval()
			
 
				-        ids = []
			
 
				-        data_dict = []
			
 
				-        num_images = len(self.dataset)
			
 
				-        print('total number of images: %d' % (num_images))
			
 
				-
			
 
				-        # --------------- COCO-style evaluation ---------------
			
 
				-        for index in range(num_images):
			
 
				-            if index % 500 == 0:
			
 
				-                print('[Eval: %d / %d]'%(index, num_images))
			
 
				-
			
 
				-            # ----------- Load an image -----------
			
 
				-            img, img_id = self.dataset.pull_image(index)
			
 
				-            orig_h, orig_w, _ = img.shape
			
 
				-
			
 
				-            # ----------- Data preprocess -----------
			
 
				-            x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device)
			
 
				-            
			
 
				-            img_id = int(img_id)
			
 
				-            ids.append(img_id)
			
 
				-
			
 
				-            # ----------- Model inference -----------
			
 
				-            outputs = model(x)
			
 
				-            scores = outputs['scores']
			
 
				-            labels = outputs['labels']
			
 
				-            bboxes = outputs['bboxes']
			
 
				-
			
 
				-            # ----------- Rescale bboxes -----------
			
 
				-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
			
 
				-
			
 
				-            # ----------- Process results -----------
			
 
				-            for i, box in enumerate(bboxes):
			
 
				-                x1 = float(box[0])
			
 
				-                y1 = float(box[1])
			
 
				-                x2 = float(box[2])
			
 
				-                y2 = float(box[3])
			
 
				-                label = self.dataset.class_ids[int(labels[i])]
			
 
				-                
			
 
				-                # COCO box format: x1, y1, bw, bh
			
 
				-                bbox = [x1, y1, x2 - x1, y2 - y1]
			
 
				-                score = float(scores[i])
			
 
				-                 # COCO json format
			
 
				-                A = {"image_id":    img_id,
			
 
				-                     "category_id": label,
			
 
				-                     "bbox":        bbox,
			
 
				-                     "score":       score}
			
 
				-                data_dict.append(A)
			
 
				-
			
 
				-        annType = ['segm', 'bbox', 'keypoints']
			
 
				-
			
 
				-        # ------------- COCO Box detection evaluation -------------
			
 
				-        if len(data_dict) > 0:
			
 
				-            print('evaluating ......')
			
 
				-            cocoGt = self.dataset.coco
			
 
				-            _, tmp = tempfile.mkstemp()
			
 
				-            json.dump(data_dict, open(tmp, 'w'))
			
 
				-            cocoDt = cocoGt.loadRes(tmp)
			
 
				-            cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
			
 
				-            cocoEval.params.imgIds = ids
			
 
				-            cocoEval.evaluate()
			
 
				-            cocoEval.accumulate()
			
 
				-            cocoEval.summarize()
			
 
				-
			
 
				-            ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
			
 
				-            print('ap50_95 : ', ap50_95)
			
 
				-            print('ap50 : ', ap50)
			
 
				-            self.map = ap50_95
			
 
				-            self.ap50_95 = ap50_95
			
 
				-            self.ap50 = ap50
			
 
				-
			
 
				-            return ap50, ap50_95
			
 
				-        else:
			
 
				-            return 0, 0
			
 
				-
			
--- a/yolo/evaluator/coco_evaluator.py
+++ b/yolo/evaluator/coco_evaluator.py
@@ -4,13 +4,13 @@ import torch
 
				 from pycocotools.cocoeval import COCOeval
			
 
				 
			
 
				 from dataset.coco import COCODataset
			
 
				+from dataset.voc  import VOCDataset
			
 
				 from utils.box_ops import rescale_bboxes
			
 
				 
			
 
				 
			
 
				-class COCOAPIEvaluator():
			
 
				-    def __init__(self, cfg, data_dir, device, transform=None):
			
 
				+class MapEvaluator():
			
 
				+    def __init__(self, dataset_name, cfg, data_dir, device, transform=None):
			
 
				         # ----------------- Basic parameters -----------------
			
 
				-        self.image_set = 'val2017'
			
 
				         self.transform = transform
			
 
				         self.device = device
			
 
				         # ----------------- Metrics -----------------
			
@@ -18,7 +18,12 @@ class COCOAPIEvaluator():
 
				         self.ap50_95 = 0.
			
 
				         self.ap50 = 0.
			
 
				         # ----------------- Dataset -----------------
			
 
				-        self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, image_set=self.image_set, transform=None, is_train=False)
			
 
				+        if   dataset_name == "coco":
			
 
				+            self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
			
 
				+        elif dataset_name == "voc":
			
 
				+            self.dataset = VOCDataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
			
 
				+        else:
			
 
				+            raise NotImplementedError("Unknown dataset name.")
			
 
				 
			
 
				     @torch.no_grad()
			
 
				     def evaluate(self, model):
			
--- a/yolo/evaluator/voc_evaluator.py
+++ b/yolo/evaluator/voc_evaluator.py
@@ -1,348 +0,0 @@
 
				-"""Adapted from:
			
 
				-    @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
			
 
				-    @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
			
 
				-    Licensed under The MIT License [see LICENSE for details]
			
 
				-"""
			
 
				-
			
 
				-from dataset.voc import VOCDataset, VOC_CLASSES
			
 
				-import os
			
 
				-import time
			
 
				-import numpy as np
			
 
				-import pickle
			
 
				-import xml.etree.ElementTree as ET
			
 
				-
			
 
				-from utils.box_ops import rescale_bboxes
			
 
				-
			
 
				-
			
 
				-class VOCAPIEvaluator():
			
 
				-    """ VOC AP Evaluation class """
			
 
				-    def __init__(self,
			
 
				-                 cfg,
			
 
				-                 data_dir, 
			
 
				-                 device,
			
 
				-                 transform,
			
 
				-                 set_type='test', 
			
 
				-                 year='2007', 
			
 
				-                 display=False):
			
 
				-        # basic config
			
 
				-        self.data_dir = data_dir
			
 
				-        self.device = device
			
 
				-        self.labelmap = VOC_CLASSES
			
 
				-        self.set_type = set_type
			
 
				-        self.year = year
			
 
				-        self.display = display
			
 
				-        self.map = 0.
			
 
				-
			
 
				-        # transform
			
 
				-        self.transform = transform
			
 
				-
			
 
				-        # path
			
 
				-        time_stamp = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(time.time()))
			
 
				-        self.devkit_path = os.path.join(data_dir, 'VOC' + year)
			
 
				-        self.annopath = os.path.join(data_dir, 'VOC2007', 'Annotations', '%s.xml')
			
 
				-        self.imgpath = os.path.join(data_dir, 'VOC2007', 'JPEGImages', '%s.jpg')
			
 
				-        self.imgsetpath = os.path.join(data_dir, 'VOC2007', 'ImageSets', 'Main', set_type+'.txt')
			
 
				-        self.output_dir = self.get_output_dir('det_results/eval/voc_eval/{}'.format(time_stamp), self.set_type)
			
 
				-
			
 
				-        # dataset
			
 
				-        self.dataset = VOCDataset(
			
 
				-            cfg=cfg,
			
 
				-            data_dir=data_dir, 
			
 
				-            image_set=[('2007', set_type)],
			
 
				-            is_train=False)
			
 
				-        
			
 
				-    def evaluate(self, net):
			
 
				-        net.eval()
			
 
				-        num_images = len(self.dataset)
			
 
				-        # all detections are collected into:
			
 
				-        #    all_boxes[cls][image] = N x 5 array of detections in
			
 
				-        #    (x1, y1, x2, y2, score)
			
 
				-        self.all_boxes = [[[] for _ in range(num_images)]
			
 
				-                        for _ in range(len(self.labelmap))]
			
 
				-
			
 
				-        # timers
			
 
				-        det_file = os.path.join(self.output_dir, 'detections.pkl')
			
 
				-
			
 
				-        for i in range(num_images):
			
 
				-            img, _ = self.dataset.pull_image(i)
			
 
				-            orig_h, orig_w = img.shape[:2]
			
 
				-
			
 
				-            # preprocess
			
 
				-            x, _, ratio = self.transform(img)
			
 
				-            x = x.unsqueeze(0).to(self.device)
			
 
				-
			
 
				-            # forward
			
 
				-            t0 = time.time()
			
 
				-            outputs = net(x)
			
 
				-            scores = outputs['scores']
			
 
				-            labels = outputs['labels']
			
 
				-            bboxes = outputs['bboxes']
			
 
				-            detect_time = time.time() - t0
			
 
				-
			
 
				-            # rescale bboxes
			
 
				-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
			
 
				-
			
 
				-            for j in range(len(self.labelmap)):
			
 
				-                inds = np.where(labels == j)[0]
			
 
				-                if len(inds) == 0:
			
 
				-                    self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
			
 
				-                    continue
			
 
				-                c_bboxes = bboxes[inds]
			
 
				-                c_scores = scores[inds]
			
 
				-                c_dets = np.hstack((c_bboxes,
			
 
				-                                    c_scores[:, np.newaxis])).astype(np.float32,
			
 
				-                                                                    copy=False)
			
 
				-                self.all_boxes[j][i] = c_dets
			
 
				-
			
 
				-            if i % 500 == 0:
			
 
				-                print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time))
			
 
				-
			
 
				-        with open(det_file, 'wb') as f:
			
 
				-            pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL)
			
 
				-
			
 
				-        print('Evaluating detections')
			
 
				-        self.evaluate_detections(self.all_boxes)
			
 
				-
			
 
				-        print('Mean AP: ', self.map)
			
 
				-  
			
 
				-    def parse_rec(self, filename):
			
 
				-        """ Parse a PASCAL VOC xml file """
			
 
				-        tree = ET.parse(filename)
			
 
				-        objects = []
			
 
				-        for obj in tree.findall('object'):
			
 
				-            obj_struct = {}
			
 
				-            obj_struct['name'] = obj.find('name').text
			
 
				-            obj_struct['pose'] = obj.find('pose').text
			
 
				-            obj_struct['truncated'] = int(obj.find('truncated').text)
			
 
				-            obj_struct['difficult'] = int(obj.find('difficult').text)
			
 
				-            bbox = obj.find('bndbox')
			
 
				-            obj_struct['bbox'] = [int(bbox.find('xmin').text),
			
 
				-                                int(bbox.find('ymin').text),
			
 
				-                                int(bbox.find('xmax').text),
			
 
				-                                int(bbox.find('ymax').text)]
			
 
				-            objects.append(obj_struct)
			
 
				-
			
 
				-        return objects
			
 
				-
			
 
				-    def get_output_dir(self, name, phase):
			
 
				-        """Return the directory where experimental artifacts are placed.
			
 
				-        If the directory does not exist, it is created.
			
 
				-        A canonical path is built using the name from an imdb and a network
			
 
				-        (if not None).
			
 
				-        """
			
 
				-        filedir = os.path.join(name, phase)
			
 
				-        if not os.path.exists(filedir):
			
 
				-            os.makedirs(filedir, exist_ok=True)
			
 
				-        return filedir
			
 
				-
			
 
				-    def get_voc_results_file_template(self, cls):
			
 
				-        # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
			
 
				-        filename = 'det_' + self.set_type + '_%s.txt' % (cls)
			
 
				-        filedir = os.path.join(self.devkit_path, 'results')
			
 
				-        if not os.path.exists(filedir):
			
 
				-            os.makedirs(filedir)
			
 
				-        path = os.path.join(filedir, filename)
			
 
				-        return path
			
 
				-
			
 
				-    def write_voc_results_file(self, all_boxes):
			
 
				-        for cls_ind, cls in enumerate(self.labelmap):
			
 
				-            if self.display:
			
 
				-                print('Writing {:s} VOC results file'.format(cls))
			
 
				-            filename = self.get_voc_results_file_template(cls)
			
 
				-            with open(filename, 'wt') as f:
			
 
				-                for im_ind, index in enumerate(self.dataset.ids):
			
 
				-                    dets = all_boxes[cls_ind][im_ind]
			
 
				-                    if len(dets) == 0:
			
 
				-                        continue
			
 
				-                    # the VOCdevkit expects 1-based indices
			
 
				-                    for k in range(dets.shape[0]):
			
 
				-                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
			
 
				-                                format(index[1], dets[k, -1],
			
 
				-                                    dets[k, 0] + 1, dets[k, 1] + 1,
			
 
				-                                    dets[k, 2] + 1, dets[k, 3] + 1))
			
 
				-
			
 
				-    def do_python_eval(self, use_07=True):
			
 
				-        cachedir = os.path.join(self.devkit_path, 'annotations_cache')
			
 
				-        aps = []
			
 
				-        # The PASCAL VOC metric changed in 2010
			
 
				-        use_07_metric = use_07
			
 
				-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
			
 
				-        if not os.path.isdir(self.output_dir):
			
 
				-            os.mkdir(self.output_dir)
			
 
				-        for i, cls in enumerate(self.labelmap):
			
 
				-            filename = self.get_voc_results_file_template(cls)
			
 
				-            rec, prec, ap = self.voc_eval(detpath=filename, 
			
 
				-                                          classname=cls, 
			
 
				-                                          cachedir=cachedir, 
			
 
				-                                          ovthresh=0.5, 
			
 
				-                                          use_07_metric=use_07_metric
			
 
				-                                        )
			
 
				-            aps += [ap]
			
 
				-            print('AP for {} = {:.4f}'.format(cls, ap))
			
 
				-            with open(os.path.join(self.output_dir, cls + '_pr.pkl'), 'wb') as f:
			
 
				-                pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
			
 
				-        if self.display:
			
 
				-            self.map = np.mean(aps)
			
 
				-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
			
 
				-            print('~~~~~~~~')
			
 
				-            print('Results:')
			
 
				-            for ap in aps:
			
 
				-                print('{:.3f}'.format(ap))
			
 
				-            print('{:.3f}'.format(np.mean(aps)))
			
 
				-            print('~~~~~~~~')
			
 
				-            print('')
			
 
				-            print('--------------------------------------------------------------')
			
 
				-            print('Results computed with the **unofficial** Python eval code.')
			
 
				-            print('Results should be very close to the official MATLAB eval code.')
			
 
				-            print('--------------------------------------------------------------')
			
 
				-        else:
			
 
				-            self.map = np.mean(aps)
			
 
				-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
			
 
				-
			
 
				-    def voc_ap(self, rec, prec, use_07_metric=True):
			
 
				-        """ ap = voc_ap(rec, prec, [use_07_metric])
			
 
				-        Compute VOC AP given precision and recall.
			
 
				-        If use_07_metric is true, uses the
			
 
				-        VOC 07 11 point method (default:True).
			
 
				-        """
			
 
				-        if use_07_metric:
			
 
				-            # 11 point metric
			
 
				-            ap = 0.
			
 
				-            for t in np.arange(0., 1.1, 0.1):
			
 
				-                if np.sum(rec >= t) == 0:
			
 
				-                    p = 0
			
 
				-                else:
			
 
				-                    p = np.max(prec[rec >= t])
			
 
				-                ap = ap + p / 11.
			
 
				-        else:
			
 
				-            # correct AP calculation
			
 
				-            # first append sentinel values at the end
			
 
				-            mrec = np.concatenate(([0.], rec, [1.]))
			
 
				-            mpre = np.concatenate(([0.], prec, [0.]))
			
 
				-
			
 
				-            # compute the precision envelope
			
 
				-            for i in range(mpre.size - 1, 0, -1):
			
 
				-                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
			
 
				-
			
 
				-            # to calculate area under PR curve, look for points
			
 
				-            # where X axis (recall) changes value
			
 
				-            i = np.where(mrec[1:] != mrec[:-1])[0]
			
 
				-
			
 
				-            # and sum (\Delta recall) * prec
			
 
				-            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
			
 
				-        return ap
			
 
				-
			
 
				-    def voc_eval(self, detpath, classname, cachedir, ovthresh=0.5, use_07_metric=True):
			
 
				-        if not os.path.isdir(cachedir):
			
 
				-            os.mkdir(cachedir)
			
 
				-        cachefile = os.path.join(cachedir, 'annots.pkl')
			
 
				-        # read list of images
			
 
				-        with open(self.imgsetpath, 'r') as f:
			
 
				-            lines = f.readlines()
			
 
				-        imagenames = [x.strip() for x in lines]
			
 
				-        if not os.path.isfile(cachefile):
			
 
				-            # load annots
			
 
				-            recs = {}
			
 
				-            for i, imagename in enumerate(imagenames):
			
 
				-                recs[imagename] = self.parse_rec(self.annopath % (imagename))
			
 
				-                if i % 100 == 0 and self.display:
			
 
				-                    print('Reading annotation for {:d}/{:d}'.format(
			
 
				-                    i + 1, len(imagenames)))
			
 
				-            # save
			
 
				-            if self.display:
			
 
				-                print('Saving cached annotations to {:s}'.format(cachefile))
			
 
				-            with open(cachefile, 'wb') as f:
			
 
				-                pickle.dump(recs, f)
			
 
				-        else:
			
 
				-            # load
			
 
				-            with open(cachefile, 'rb') as f:
			
 
				-                recs = pickle.load(f)
			
 
				-
			
 
				-        # extract gt objects for this class
			
 
				-        class_recs = {}
			
 
				-        npos = 0
			
 
				-        for imagename in imagenames:
			
 
				-            R = [obj for obj in recs[imagename] if obj['name'] == classname]
			
 
				-            bbox = np.array([x['bbox'] for x in R])
			
 
				-            difficult = np.array([x['difficult'] for x in R]).astype(np.bool_)
			
 
				-            det = [False] * len(R)
			
 
				-            npos = npos + sum(~difficult)
			
 
				-            class_recs[imagename] = {'bbox': bbox,
			
 
				-                                    'difficult': difficult,
			
 
				-                                    'det': det}
			
 
				-
			
 
				-        # read dets
			
 
				-        detfile = detpath.format(classname)
			
 
				-        with open(detfile, 'r') as f:
			
 
				-            lines = f.readlines()
			
 
				-        if any(lines) == 1:
			
 
				-
			
 
				-            splitlines = [x.strip().split(' ') for x in lines]
			
 
				-            image_ids = [x[0] for x in splitlines]
			
 
				-            confidence = np.array([float(x[1]) for x in splitlines])
			
 
				-            BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
			
 
				-
			
 
				-            # sort by confidence
			
 
				-            sorted_ind = np.argsort(-confidence)
			
 
				-            sorted_scores = np.sort(-confidence)
			
 
				-            BB = BB[sorted_ind, :]
			
 
				-            image_ids = [image_ids[x] for x in sorted_ind]
			
 
				-
			
 
				-            # go down dets and mark TPs and FPs
			
 
				-            nd = len(image_ids)
			
 
				-            tp = np.zeros(nd)
			
 
				-            fp = np.zeros(nd)
			
 
				-            for d in range(nd):
			
 
				-                R = class_recs[image_ids[d]]
			
 
				-                bb = BB[d, :].astype(float)
			
 
				-                ovmax = -np.inf
			
 
				-                BBGT = R['bbox'].astype(float)
			
 
				-                if BBGT.size > 0:
			
 
				-                    # compute overlaps
			
 
				-                    # intersection
			
 
				-                    ixmin = np.maximum(BBGT[:, 0], bb[0])
			
 
				-                    iymin = np.maximum(BBGT[:, 1], bb[1])
			
 
				-                    ixmax = np.minimum(BBGT[:, 2], bb[2])
			
 
				-                    iymax = np.minimum(BBGT[:, 3], bb[3])
			
 
				-                    iw = np.maximum(ixmax - ixmin, 0.)
			
 
				-                    ih = np.maximum(iymax - iymin, 0.)
			
 
				-                    inters = iw * ih
			
 
				-                    uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
			
 
				-                        (BBGT[:, 2] - BBGT[:, 0]) *
			
 
				-                        (BBGT[:, 3] - BBGT[:, 1]) - inters)
			
 
				-                    overlaps = inters / uni
			
 
				-                    ovmax = np.max(overlaps)
			
 
				-                    jmax = np.argmax(overlaps)
			
 
				-
			
 
				-                if ovmax > ovthresh:
			
 
				-                    if not R['difficult'][jmax]:
			
 
				-                        if not R['det'][jmax]:
			
 
				-                            tp[d] = 1.
			
 
				-                            R['det'][jmax] = 1
			
 
				-                        else:
			
 
				-                            fp[d] = 1.
			
 
				-                else:
			
 
				-                    fp[d] = 1.
			
 
				-
			
 
				-            # compute precision recall
			
 
				-            fp = np.cumsum(fp)
			
 
				-            tp = np.cumsum(tp)
			
 
				-            rec = tp / float(npos)
			
 
				-            # avoid divide by zero in case the first detection matches a difficult
			
 
				-            # ground truth
			
 
				-            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
			
 
				-            ap = self.voc_ap(rec, prec, use_07_metric)
			
 
				-        else:
			
 
				-            rec = -1.
			
 
				-            prec = -1.
			
 
				-            ap = -1.
			
 
				-
			
 
				-        return rec, prec, ap
			
 
				-
			
 
				-    def evaluate_detections(self, box_list):
			
 
				-        self.write_voc_results_file(box_list)
			
 
				-        self.do_python_eval()
			
 
				-
			
 
				-
			
 
				-if __name__ == '__main__':
			
 
				-    pass
			
--- a/yolo/models/__init__.py
+++ b/yolo/models/__init__.py
@@ -50,7 +50,7 @@ def build_model(args, cfg, is_val=False):
 
				 
			
 
				     if is_val:
			
 
				         # ------------ Load pretrained weight ------------
			
 
				-        if args.pretrained is not None:
			
 
				+        if hasattr(args, "pretrained") and args.pretrained is not None:
			
 
				             print('Loading COCO pretrained weight ...')
			
 
				             checkpoint = torch.load(args.pretrained, map_location='cpu')
			
 
				             # checkpoint state dict
			
@@ -72,7 +72,7 @@ def build_model(args, cfg, is_val=False):
 
				             model.load_state_dict(checkpoint_state_dict, strict=False)
			
 
				 
			
 
				         # ------------ Keep training from the given checkpoint ------------
			
 
				-        if args.resume and args.resume != "None":
			
 
				+        if hasattr(args, "resume") and args.resume and args.resume != "None":
			
 
				             checkpoint = torch.load(args.resume, map_location='cpu')
			
 
				             # checkpoint state dict
			
 
				             try:
			
--- a/yolo/tools/convert_voc_to_coco.py
+++ b/yolo/tools/convert_voc_to_coco.py
@@ -0,0 +1,163 @@
 
				+import cv2
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import os.path as osp
			
 
				+import xml.etree.ElementTree as ET
			
 
				+import torch.utils.data as data
			
 
				+
			
 
				+voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
			
 
				+voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
			
 
				+
			
 
				+
			
 
				+class VOCAnnotationTransform(object):
			
 
				+    def __init__(self, class_to_ind=None, keep_difficult=False):
			
 
				+        self.class_to_ind = class_to_ind or dict(
			
 
				+            zip(voc_class_labels, range(len(voc_class_labels))))
			
 
				+        self.keep_difficult = keep_difficult
			
 
				+
			
 
				+    def __call__(self, target):
			
 
				+        res = []
			
 
				+        for obj in target.iter('object'):
			
 
				+            difficult = int(obj.find('difficult').text) == 1
			
 
				+            if not self.keep_difficult and difficult:
			
 
				+                continue
			
 
				+            name = obj.find('name').text.lower().strip()
			
 
				+            bbox = obj.find('bndbox')
			
 
				+
			
 
				+            pts = ['xmin', 'ymin', 'xmax', 'ymax']
			
 
				+            bndbox = []
			
 
				+            for i, pt in enumerate(pts):
			
 
				+                cur_pt = int(bbox.find(pt).text) - 1
			
 
				+                bndbox.append(cur_pt)
			
 
				+            label_idx = self.class_to_ind[name]
			
 
				+            bndbox.append(label_idx)
			
 
				+            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
			
 
				+
			
 
				+        return res  # [[x1, y1, x2, y2, label_ind], ... ]
			
 
				+
			
 
				+class VOCDataset(data.Dataset):
			
 
				+    def __init__(self, 
			
 
				+                 root   :str = None, 
			
 
				+                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
			
 
				+                 is_train   :bool =False,
			
 
				+                 ):
			
 
				+        # ----------- Basic parameters -----------
			
 
				+        self.image_set = image_set
			
 
				+        self.is_train  = is_train
			
 
				+        self.num_classes = 20
			
 
				+        # ----------- Path parameters -----------
			
 
				+        self.root = root
			
 
				+        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
			
 
				+        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
			
 
				+        # ----------- Data parameters -----------
			
 
				+        self.ids = list()
			
 
				+        for (year, name) in image_set:
			
 
				+            rootpath = osp.join(self.root, 'VOC' + year)
			
 
				+            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
			
 
				+                self.ids.append((rootpath, line.strip()))
			
 
				+        self.dataset_size = len(self.ids)
			
 
				+        self.class_labels = voc_class_labels
			
 
				+        self.class_indexs = voc_class_indexs
			
 
				+        # ----------- Transform parameters -----------
			
 
				+        self.target_transform = VOCAnnotationTransform()
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return self.dataset_size
			
 
				+
			
 
				+    def pull_item(self, index):
			
 
				+        # load an image
			
 
				+        img_id = self.ids[index]
			
 
				+        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
			
 
				+        height, width, channels = image.shape
			
 
				+
			
 
				+        # laod an annotation
			
 
				+        anno = ET.parse(self._annopath % img_id).getroot()
			
 
				+        anno = self.target_transform(anno)
			
 
				+
			
 
				+        # guard against no boxes via resizing
			
 
				+        anno = np.array(anno).reshape(-1, 5)
			
 
				+        bboxes = anno[:, :4]  # [N, 4]
			
 
				+        labels = anno[:, 4]   # [N,]
			
 
				+        target = {
			
 
				+            "file_name": "{}.jpg".format(img_id[-1]),
			
 
				+            "bboxes": bboxes,
			
 
				+            "labels": labels,
			
 
				+            "orig_size": [height, width],
			
 
				+            "id": index,
			
 
				+        }
			
 
				+        
			
 
				+        return target
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import json
			
 
				+
			
 
				+    # json_file = "D:\\python_work\\dataset\\COCO\\annotations\\instances_val2017.json"
			
 
				+    # with open(json_file, 'r') as f:
			
 
				+    #     data_dict = json.load(f)
			
 
				+    # print(data_dict['info'])
			
 
				+    # print(data_dict.keys())
			
 
				+    # print(len(data_dict["annotations"]))
			
 
				+    # print(len(data_dict["images"]))
			
 
				+    # print(data_dict["images"][0])
			
 
				+    # print(data_dict["images"][1])
			
 
				+    # print(data_dict["images"][2])
			
 
				+    # print(data_dict["annotations"][0])
			
 
				+    # print(data_dict["annotations"][1])
			
 
				+    # print(data_dict["annotations"][2])
			
 
				+    # exit()
			
 
				+
			
 
				+    # opt
			
 
				+    is_train = False
			
 
				+    dataset = VOCDataset(root='D:/python_work/dataset/VOCdevkit/',
			
 
				+                         image_set=[('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')],
			
 
				+                         is_train=is_train,
			
 
				+                         )
			
 
				+    
			
 
				+    print('Data length: ', len(dataset))
			
 
				+
			
 
				+    coco_dict = {
			
 
				+        "images": [],
			
 
				+        "annotations": [],
			
 
				+        "type": "instances",
			
 
				+        "categories": [{'supercategory': name, "id": i, 'name': name} for i, name in enumerate(voc_class_labels)]
			
 
				+    }
			
 
				+    anno_id = 0
			
 
				+    for i in range(len(dataset)):
			
 
				+        if i % 1000 == 0:
			
 
				+            print(" - [{}] / [{}] ...".format(i, len(dataset)))
			
 
				+
			
 
				+        target = dataset.pull_item(i)
			
 
				+
			
 
				+        # images info.
			
 
				+        file_name = target["file_name"]
			
 
				+        height, width = target["orig_size"]
			
 
				+        id = int(target["id"])
			
 
				+
			
 
				+        coco_dict["images"].append({
			
 
				+            'file_name': file_name,
			
 
				+            'height': height,
			
 
				+            'width': width,
			
 
				+            'id': id
			
 
				+        })
			
 
				+
			
 
				+        # annotation info.
			
 
				+        bboxes = target["bboxes"]
			
 
				+        labels = target["labels"]
			
 
				+
			
 
				+        for bbox, label in zip(bboxes, labels):
			
 
				+            x1, y1, x2, y2 = bbox
			
 
				+            coco_dict["annotations"].append({
			
 
				+                'bbox': [int(x1), int(y1), int(x2 - x1), int(y2 - y1)],
			
 
				+                'area': int((x2 - x1) * (y2 - y1)),
			
 
				+                'category_id': int(label),
			
 
				+                'image_id': id,
			
 
				+                'id': anno_id,
			
 
				+                'iscrowd': 0,
			
 
				+            })
			
 
				+            anno_id += 1
			
 
				+
			
 
				+    json_file = "D:\\python_work\\dataset\\VOCdevkit\\annotations\\instances_val.json"
			
 
				+    with open(json_file, 'w') as f:
			
 
				+        json.dump(coco_dict, f, indent=4)
			
 
				+    print(f"Data saved to {json_file}")
			
--- a/yolo/train.py
+++ b/yolo/train.py
@@ -23,7 +23,7 @@ from config import build_config
 
				 from dataset.build import build_dataset, build_transform
			
 
				 
			
 
				 # ----------------- Evaluator Components -----------------
			
 
				-from evaluator.build import build_evluator
			
 
				+from evaluator.map_evaluator import MapEvaluator
			
 
				 
			
 
				 # ----------------- Model Components -----------------
			
 
				 from models import build_model
			
@@ -149,7 +149,12 @@ def train():
 
				     train_loader = build_dataloader(args, dataset, args.batch_size // world_size, CollateFunc())
			
 
				 
			
 
				     # ---------------------------- Build Evaluator ----------------------------
			
 
				-    evaluator = build_evluator(args, cfg, val_transform, device)
			
 
				+    evaluator = MapEvaluator(cfg = cfg,
			
 
				+                             dataset_name = args.dataset,
			
 
				+                             data_dir     = args.root,
			
 
				+                             device       = device,
			
 
				+                             transform    = val_transform
			
 
				+                             )
			
 
				 
			
 
				     # ---------------------------- Build model ----------------------------
			
 
				     ## Build model