1 рік тому · 55b0dc15b2
--- a/yolo/dataset/build.py
+++ b/yolo/dataset/build.py
@@ -2,18 +2,18 @@ import os
 
															 try:
														
 
															     # dataset class
														
 
															-    from .voc        import VOCDataset
														
 
															-    from .coco       import COCODataset
														
 
															-    from .custom     import CustomDataset
														
 
															+    from .voc     import VOCDataset
														
 
															+    from .coco    import COCODataset
														
 
															+    from .custom  import CustomDataset
														
 
															     # transform class
														
 
															     from .data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
														
 
															     from .data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
														
 
															 except:
														
 
															     # dataset class
														
 
															-    from voc        import VOCDataset
														
 
															-    from coco       import COCODataset
														
 
															-    from yolo.dataset.custom   import CustomDataset
														
 
															+    from voc     import VOCDataset
														
 
															+    from coco    import COCODataset
														
 
															+    from custom  import CustomDataset
														
 
															     # transform class
														
 
															     from data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
														
 
															     from data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
														
@@ -24,19 +24,15 @@ def build_dataset(args, cfg, transform=None, is_train=False):
 
															     # ------------------------- Build dataset -------------------------
														
 
															     ## VOC dataset
														
 
															     if args.dataset == 'voc':
														
 
															-        image_set = [('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')]
														
 
															         dataset = VOCDataset(cfg       = cfg,
														
 
															                              data_dir  = args.root,
														
 
															-                             image_set = image_set,
														
 
															                              transform = transform,
														
 
															                              is_train  = is_train,
														
 
															                              )
														
 
															     ## COCO dataset
														
 
															     elif args.dataset == 'coco':
														
 
															-        image_set = 'train2017' if is_train else 'val2017'
														
 
															         dataset = COCODataset(cfg       = cfg,
														
 
															                               data_dir  = args.root,
														
 
															-                              image_set = image_set,
														
 
															                               transform = transform,
														
 
															                               is_train  = is_train,
														
 
															                               )
														
--- a/yolo/dataset/coco.py
+++ b/yolo/dataset/coco.py
@@ -25,19 +25,18 @@ class COCODataset(Dataset):
 
															     def __init__(self, 
														
 
															                  cfg,
														
 
															                  data_dir  :str = None, 
														
 
															-                 image_set :str = 'train2017',
														
 
															                  transform = None,
														
 
															                  is_train  :bool = False,
														
 
															                  use_mask  :bool = False,
														
 
															                  ):
														
 
															         # ----------- Basic parameters -----------
														
 
															         self.data_dir  = data_dir
														
 
															-        self.image_set = image_set
														
 
															+        self.image_set = "train2017" if is_train else "val2017"
														
 
															         self.is_train  = is_train
														
 
															         self.use_mask  = use_mask
														
 
															         self.num_classes = 80
														
 
															         # ----------- Data parameters -----------
														
 
															-        self.json_file = coco_json_files['{}'.format(image_set)]
														
 
															+        self.json_file = coco_json_files['{}'.format(self.image_set)]
														
 
															         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
														
 
															         self.ids = self.coco.getImgIds()
														
 
															         self.class_ids = sorted(self.coco.getCatIds())
														
@@ -148,19 +147,18 @@ class COCODataset(Dataset):
 
															         return image, target, deltas
														
 
															     def pull_image(self, index):
														
 
															-        img_id = self.ids[index]
														
 
															-        img_file = os.path.join(self.data_dir, self.image_set,
														
 
															-                                '{:012}'.format(img_id) + '.jpg')
														
 
															-        image = cv2.imread(img_file)
														
 
															+        # get the image file name
														
 
															+        image_dict = self.coco.dataset['images'][index]
														
 
															+        image_id = image_dict["id"]
														
 
															+        filename = image_dict["file_name"]
														
 
															-        if self.json_file == 'instances_val5k.json' and image is None:
														
 
															-            img_file = os.path.join(self.data_dir, 'train2017',
														
 
															-                                    '{:012}'.format(img_id) + '.jpg')
														
 
															-            image = cv2.imread(img_file)
														
 
															+        # load the image
														
 
															+        image_path = os.path.join(self.data_dir, self.image_set, filename)
														
 
															+        image = cv2.imread(image_path)
														
 
															         assert image is not None
														
 
															-        return image, img_id
														
 
															+        return image, image_id
														
 
															     def pull_anno(self, index):
														
 
															         img_id = self.ids[index]
														
@@ -265,7 +263,7 @@ if __name__ == "__main__":
 
															         cfg = SSDBaseConfig()
														
 
															     transform = build_transform(cfg, args.is_train)
														
 
															-    dataset = COCODataset(cfg, args.root, 'val2017', transform, args.is_train)
														
 
															+    dataset = COCODataset(cfg, args.root, transform, args.is_train)
														
 
															     np.random.seed(0)
														
 
															     class_colors = [(np.random.randint(255),
														
--- a/yolo/dataset/custom.py
+++ b/yolo/dataset/custom.py
@@ -20,19 +20,18 @@ class CustomDataset(Dataset):
 
															     def __init__(self, 
														
 
															                  cfg,
														
 
															                  data_dir     :str = None, 
														
 
															-                 image_set    :str = 'train2017',
														
 
															                  transform    = None,
														
 
															                  is_train     :bool =False,
														
 
															                  ):
														
 
															         # ----------- Basic parameters -----------
														
 
															-        self.image_set = image_set
														
 
															+        self.image_set = "train" if is_train else "val"
														
 
															         self.is_train  = is_train
														
 
															         self.num_classes = len(custom_class_labels)
														
 
															         # ----------- Path parameters -----------
														
 
															         self.data_dir = data_dir
														
 
															-        self.json_file = '{}.json'.format(image_set)
														
 
															+        self.json_file = '{}.json'.format(self.image_set)
														
 
															         # ----------- Data parameters -----------
														
 
															-        self.coco = COCO(os.path.join(self.data_dir, image_set, 'annotations', self.json_file))
														
 
															+        self.coco = COCO(os.path.join(self.data_dir, self.image_set, 'annotations', self.json_file))
														
 
															         self.ids = self.coco.getImgIds()
														
 
															         self.class_ids = sorted(self.coco.getCatIds())
														
 
															         self.dataset_size = len(self.ids)
														
@@ -53,7 +52,7 @@ class CustomDataset(Dataset):
 
															             self.mosaic_augment = None
														
 
															             self.mixup_augment  = None
														
 
															         print('==============================')
														
 
															-        print('Image Set: {}'.format(image_set))
														
 
															+        print('Image Set: {}'.format(self.image_set))
														
 
															         print('Json file: {}'.format(self.json_file))
														
 
															         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
														
 
															         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
														
@@ -255,7 +254,7 @@ if __name__ == "__main__":
 
															         cfg = SSDBaseConfig()
														
 
															     transform = build_transform(cfg, args.is_train)
														
 
															-    dataset = CustomDataset(cfg, args.root, 'val', transform, args.is_train)
														
 
															+    dataset = CustomDataset(cfg, args.root, transform, args.is_train)
														
 
															     np.random.seed(0)
														
 
															     class_colors = [(np.random.randint(255),
														
--- a/yolo/dataset/scripts/data_to_h5py.py
+++ b/yolo/dataset/scripts/data_to_h5py.py
@@ -1,70 +0,0 @@
 
															-import cv2
														
 
															-import h5py
														
 
															-import os
														
 
															-import argparse
														
 
															-import numpy as np
														
 
															-import sys
														
 
															-
														
 
															-sys.path.append('..')
														
 
															-from voc import VOCDetection
														
 
															-from coco import COCODataset
														
 
															-
														
 
															-# ---------------------- Opt ----------------------
														
 
															-parser = argparse.ArgumentParser(description='Cache-Dataset')
														
 
															-parser.add_argument('-d', '--dataset', default='voc',
														
 
															-                    help='coco, voc, widerface, crowdhuman')
														
 
															-parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
														
 
															-                    help='data root')
														
 
															-parser.add_argument('-size', '--img_size', default=640, type=int,
														
 
															-                    help='input image size.')
														
 
															-parser.add_argument('--mosaic', default=None, type=float,
														
 
															-                    help='mosaic augmentation.')
														
 
															-parser.add_argument('--mixup', default=None, type=float,
														
 
															-                    help='mixup augmentation.')
														
 
															-parser.add_argument('--keep_ratio', action="store_true", default=False,
														
 
															-                    help='keep aspect ratio.')
														
 
															-parser.add_argument('--show', action="store_true", default=False,
														
 
															-                    help='keep aspect ratio.')
														
 
															-
														
 
															-args = parser.parse_args()
														
 
															-
														
 
															-
														
 
															-# ---------------------- Build Dataset ----------------------
														
 
															-if args.dataset == 'voc':
														
 
															-    root = os.path.join(args.root, 'VOCdevkit')
														
 
															-    dataset = VOCDetection(args.img_size, root)
														
 
															-elif args.dataset == 'coco':
														
 
															-    root = os.path.join(args.root, 'COCO')
														
 
															-    dataset = COCODataset(args.img_size, args.root)
														
 
															-print('Data length: ', len(dataset))
														
 
															-
														
 
															-
														
 
															-# ---------------------- Main Process ----------------------
														
 
															-cached_image = []
														
 
															-dataset_size = len(dataset)
														
 
															-for i in range(len(dataset)):
														
 
															-    if i % 5000 == 0:
														
 
															-        print("[{} / {}]".format(i, dataset_size))
														
 
															-    # load an image
														
 
															-    image, image_id = dataset.pull_image(i)
														
 
															-    orig_h, orig_w, _ = image.shape
														
 
															-
														
 
															-    # resize image
														
 
															-    if args.keep_ratio:
														
 
															-        r = args.img_size / max(orig_h, orig_w)
														
 
															-        if r != 1: 
														
 
															-            interp = cv2.INTER_LINEAR
														
 
															-            new_size = (int(orig_w * r), int(orig_h * r))
														
 
															-            image = cv2.resize(image, new_size, interpolation=interp)
														
 
															-    else:
														
 
															-        image = cv2.resize(image, (int(args.img_size), int(args.img_size)))
														
 
															-
														
 
															-    cached_image.append(image)
														
 
															-    if args.show:
														
 
															-        cv2.imshow('image', image)
														
 
															-        # cv2.imwrite(str(i)+'.jpg', img)
														
 
															-        cv2.waitKey(0)
														
 
															-
														
 
															-save_path = "dataset/cache/"
														
 
															-os.makedirs(save_path, exist_ok=True)
														
 
															-np.save(save_path + '{}_train_images.npy'.format(args.dataset), cached_image)
														
--- a/yolo/dataset/voc.py
+++ b/yolo/dataset/voc.py
@@ -1,9 +1,10 @@
 
															+import os
														
 
															 import cv2
														
 
															+import time
														
 
															 import random
														
 
															 import numpy as np
														
 
															-import os.path as osp
														
 
															-import xml.etree.ElementTree as ET
														
 
															-import torch.utils.data as data
														
 
															+from torch.utils.data import Dataset
														
 
															+from pycocotools.coco import COCO
														
 
															 try:
														
 
															     from .data_augment.strong_augment import MosaicAugment, MixupAugment
														
@@ -11,65 +12,31 @@ except:
 
															     from  data_augment.strong_augment import MosaicAugment, MixupAugment
														
 
															-VOC_CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
														
 
															 voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
														
 
															 voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
														
 
															-class VOCAnnotationTransform(object):
														
 
															-    def __init__(self, class_to_ind=None, keep_difficult=False):
														
 
															-        self.class_to_ind = class_to_ind or dict(
														
 
															-            zip(VOC_CLASSES, range(len(VOC_CLASSES))))
														
 
															-        self.keep_difficult = keep_difficult
														
 
															-
														
 
															-    def __call__(self, target):
														
 
															-        res = []
														
 
															-        for obj in target.iter('object'):
														
 
															-            difficult = int(obj.find('difficult').text) == 1
														
 
															-            if not self.keep_difficult and difficult:
														
 
															-                continue
														
 
															-            name = obj.find('name').text.lower().strip()
														
 
															-            bbox = obj.find('bndbox')
														
 
															-
														
 
															-            pts = ['xmin', 'ymin', 'xmax', 'ymax']
														
 
															-            bndbox = []
														
 
															-            for i, pt in enumerate(pts):
														
 
															-                cur_pt = int(bbox.find(pt).text) - 1
														
 
															-                bndbox.append(cur_pt)
														
 
															-            label_idx = self.class_to_ind[name]
														
 
															-            bndbox.append(label_idx)
														
 
															-            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
														
 
															-
														
 
															-        return res  # [[x1, y1, x2, y2, label_ind], ... ]
														
 
															-
														
 
															-
														
 
															-class VOCDataset(data.Dataset):
														
 
															+class VOCDataset(Dataset):
														
 
															     def __init__(self, 
														
 
															                  cfg,
														
 
															-                 data_dir   :str = None, 
														
 
															-                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
														
 
															-                 transform  = None,
														
 
															-                 is_train   :bool =False,
														
 
															+                 data_dir  :str = None, 
														
 
															+                 transform = None,
														
 
															+                 is_train  :bool = False,
														
 
															                  ):
														
 
															         # ----------- Basic parameters -----------
														
 
															-        self.image_set = image_set
														
 
															+        self.data_dir  = data_dir
														
 
															+        self.image_set = "train" if is_train else "val"
														
 
															         self.is_train  = is_train
														
 
															         self.num_classes = 20
														
 
															-        # ----------- Path parameters -----------
														
 
															-        self.root = data_dir
														
 
															-        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
														
 
															-        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
														
 
															         # ----------- Data parameters -----------
														
 
															-        self.ids = list()
														
 
															-        for (year, name) in image_set:
														
 
															-            rootpath = osp.join(self.root, 'VOC' + year)
														
 
															-            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
														
 
															-                self.ids.append((rootpath, line.strip()))
														
 
															+        self.json_file = "instances_{}.json".format(self.image_set)
														
 
															+        self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
														
 
															+        self.ids = self.coco.getImgIds()
														
 
															+        self.class_ids = sorted(self.coco.getCatIds())
														
 
															         self.dataset_size = len(self.ids)
														
 
															         self.class_labels = voc_class_labels
														
 
															         self.class_indexs = voc_class_indexs
														
 
															         # ----------- Transform parameters -----------
														
 
															-        self.target_transform = VOCAnnotationTransform()
														
 
															         self.transform = transform
														
 
															         if is_train:
														
 
															             self.mosaic_prob = cfg.mosaic_prob
														
@@ -85,16 +52,15 @@ class VOCDataset(data.Dataset):
 
															             self.mixup_augment  = None
														
 
															         print('==============================')
														
 
															         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
														
 
															-        print('use Mixup Augmentation:  {}'.format(self.mixup_prob))
														
 
															+        print('use Mixup Augmentation: {}'.format(self.mixup_prob))
														
 
															         print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
														
 
															     # ------------ Basic dataset function ------------
														
 
															-    def __getitem__(self, index):
														
 
															-        image, target, deltas = self.pull_item(index)
														
 
															-        return image, target, deltas
														
 
															-
														
 
															     def __len__(self):
														
 
															-        return self.dataset_size
														
 
															+        return len(self.ids)
														
 
															+
														
 
															+    def __getitem__(self, index):
														
 
															+        return self.pull_item(index)
														
 
															     # ------------ Mosaic & Mixup ------------
														
 
															     def load_mosaic(self, index):
														
@@ -138,17 +104,14 @@ class VOCDataset(data.Dataset):
 
															         image, _ = self.pull_image(index)
														
 
															         height, width, channels = image.shape
														
 
															-        # laod an annotation
														
 
															-        anno, _ = self.pull_anno(index)
														
 
															-
														
 
															-        # guard against no boxes via resizing
														
 
															-        anno = np.array(anno).reshape(-1, 5)
														
 
															+        # load a target
														
 
															+        bboxes, labels = self.pull_anno(index)
														
 
															         target = {
														
 
															-            "boxes": anno[:, :4],
														
 
															-            "labels": anno[:, 4],
														
 
															+            "boxes": bboxes,
														
 
															+            "labels": labels,
														
 
															             "orig_size": [height, width]
														
 
															         }
														
 
															-        
														
 
															+
														
 
															         return image, target
														
 
															     def pull_item(self, index):
														
@@ -177,17 +140,54 @@ class VOCDataset(data.Dataset):
 
															         return image, target, deltas
														
 
															     def pull_image(self, index):
														
 
															-        img_id = self.ids[index]
														
 
															-        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
														
 
															+        # get the image file name
														
 
															+        image_dict = self.coco.dataset['images'][index]
														
 
															+        image_id = image_dict["id"]
														
 
															+        filename = image_dict["file_name"]
														
 
															+
														
 
															+        # load the image
														
 
															+        image_path = os.path.join(self.data_dir, "images", filename)
														
 
															+        image = cv2.imread(image_path)
														
 
															-        return image, img_id
														
 
															+        assert image is not None
														
 
															+
														
 
															+        return image, image_id
														
 
															     def pull_anno(self, index):
														
 
															-        img_id = self.ids[index]
														
 
															-        anno = ET.parse(self._annopath % img_id).getroot()
														
 
															-        anno = self.target_transform(anno)
														
 
															+        img_id = self.ids[index]        
														
 
															+        # image infor
														
 
															+        im_ann = self.coco.loadImgs(img_id)[0]
														
 
															+        width = im_ann['width']
														
 
															+        height = im_ann['height']
														
 
															-        return anno, img_id
														
 
															+        # annotation infor
														
 
															+        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=None)
														
 
															+        annotations = self.coco.loadAnns(anno_ids)
														
 
															+
														
 
															+        
														
 
															+        #load a target
														
 
															+        bboxes = []
														
 
															+        labels = []
														
 
															+        for anno in annotations:
														
 
															+            if 'bbox' in anno and anno['area'] > 0:
														
 
															+                # bbox
														
 
															+                x1 = np.max((0, anno['bbox'][0]))
														
 
															+                y1 = np.max((0, anno['bbox'][1]))
														
 
															+                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
														
 
															+                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
														
 
															+                if x2 < x1 or y2 < y1:
														
 
															+                    continue
														
 
															+                # class label
														
 
															+                cls_id = self.class_ids.index(anno['category_id'])
														
 
															+                
														
 
															+                bboxes.append([x1, y1, x2, y2])
														
 
															+                labels.append(cls_id)
														
 
															+
														
 
															+        # guard against no boxes via resizing
														
 
															+        bboxes = np.array(bboxes).reshape(-1, 4)
														
 
															+        labels = np.array(labels).reshape(-1)
														
 
															+        
														
 
															+        return bboxes, labels
														
 
															 if __name__ == "__main__":
														
@@ -195,16 +195,16 @@ if __name__ == "__main__":
 
															     import argparse
														
 
															     from build import build_transform
														
 
															-    parser = argparse.ArgumentParser(description='VOC-Dataset')
														
 
															+    parser = argparse.ArgumentParser(description='COCO-Dataset')
														
 
															     # opt
														
 
															-    parser.add_argument('--root', default='D:/python_work/dataset/VOCdevkit/',
														
 
															+    parser.add_argument('--root', default="D:/python_work/dataset/VOCdevkit/",
														
 
															                         help='data root')
														
 
															     parser.add_argument('--is_train', action="store_true", default=False,
														
 
															-                        help='train or not.')
														
 
															+                        help='mixup augmentation.')
														
 
															     parser.add_argument('--aug_type', default="yolo", type=str, choices=["yolo", "ssd"],
														
 
															                         help='yolo, ssd.')
														
 
															-    
														
 
															+
														
 
															     args = parser.parse_args()
														
 
															     class YoloBaseConfig(object):
														
@@ -258,7 +258,7 @@ if __name__ == "__main__":
 
															         cfg = SSDBaseConfig()
														
 
															     transform = build_transform(cfg, args.is_train)
														
 
															-    dataset = VOCDataset(cfg, args.root, [('2007', 'test')], transform, args.is_train)
														
 
															+    dataset = VOCDataset(cfg, args.root, transform, args.is_train)
														
 
															     np.random.seed(0)
														
 
															     class_colors = [(np.random.randint(255),
														
@@ -314,4 +314,4 @@ if __name__ == "__main__":
 
															             cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
														
 
															         cv2.imshow('gt', image)
														
 
															         # cv2.imwrite(str(i)+'.jpg', img)
														
 
															-        cv2.waitKey(0)
														
 
															+        cv2.waitKey(0)
														
--- a/yolo/eval.py
+++ b/yolo/eval.py
@@ -2,9 +2,7 @@ import argparse
 
															 import torch
														
 
															 # evaluators
														
 
															-from evaluator.voc_evaluator    import VOCAPIEvaluator
														
 
															-from evaluator.coco_evaluator   import COCOAPIEvaluator
														
 
															-from evaluator.custom_evaluator import CustomEvaluator
														
 
															+from evaluator.map_evaluator import MapEvaluator
														
 
															 # load transform
														
 
															 from dataset.build import build_dataset, build_transform
														
@@ -47,40 +45,6 @@ def parse_args():
 
															     return parser.parse_args()
														
 
															-
														
 
															-def voc_test(cfg, model, data_dir, device, transform):
														
 
															-    evaluator = VOCAPIEvaluator(cfg=cfg,
														
 
															-                                data_dir=data_dir,
														
 
															-                                device=device,
														
 
															-                                transform=transform,
														
 
															-                                display=True)
														
 
															-
														
 
															-    # VOC evaluation
														
 
															-    evaluator.evaluate(model)
														
 
															-
														
 
															-def coco_test(cfg, model, data_dir, device, transform):
														
 
															-    # eval
														
 
															-    evaluator = COCOAPIEvaluator(
														
 
															-                    cfg=cfg,
														
 
															-                    data_dir=data_dir,
														
 
															-                    device=device,
														
 
															-                    transform=transform)
														
 
															-
														
 
															-    # COCO evaluation
														
 
															-    evaluator.evaluate(model)
														
 
															-
														
 
															-def custom_test(cfg, model, data_dir, device, transform):
														
 
															-    evaluator = CustomEvaluator(
														
 
															-        cfg=cfg,
														
 
															-        data_dir=data_dir,
														
 
															-        device=device,
														
 
															-        image_set='val',
														
 
															-        transform=transform)
														
 
															-
														
 
															-    # WiderFace evaluation
														
 
															-    evaluator.evaluate(model)
														
 
															-
														
 
															-
														
 
															 if __name__ == '__main__':
														
 
															     args = parse_args()
														
 
															     # cuda
														
@@ -107,10 +71,10 @@ if __name__ == '__main__':
 
															     model.to(device).eval()
														
 
															     # evaluation
														
 
															-    with torch.no_grad():
														
 
															-        if args.dataset == 'voc':
														
 
															-            voc_test(cfg, model, args.root, device, transform)
														
 
															-        elif args.dataset == 'coco':
														
 
															-            coco_test(cfg, model, args.root, device, transform)
														
 
															-        elif args.dataset == 'custom':
														
 
															-            custom_test(cfg, model, args.root, device, transform)
														
 
															+    evaluator = MapEvaluator(cfg = cfg,
														
 
															+                             dataset_name = args.dataset,
														
 
															+                             data_dir  = args.root,
														
 
															+                             device    = device,
														
 
															+                             transform = transform
														
 
															+                             )
														
 
															+    evaluator.evaluate(model)
														
--- a/yolo/evaluator/build.py
+++ b/yolo/evaluator/build.py
@@ -1,33 +0,0 @@
 
															-import os
														
 
															-
														
 
															-from evaluator.coco_evaluator import COCOAPIEvaluator
														
 
															-from evaluator.voc_evaluator import VOCAPIEvaluator
														
 
															-from evaluator.custom_evaluator import CustomEvaluator
														
 
															-
														
 
															-
														
 
															-
														
 
															-def build_evluator(args, cfg, transform, device):
														
 
															-    # Evaluator
														
 
															-    ## VOC Evaluator
														
 
															-    if args.dataset == 'voc':
														
 
															-        evaluator = VOCAPIEvaluator(cfg       = cfg,
														
 
															-                                    data_dir  = args.root,
														
 
															-                                    device    = device,
														
 
															-                                    transform = transform
														
 
															-                                    )
														
 
															-    ## COCO Evaluator
														
 
															-    elif args.dataset == 'coco':
														
 
															-        evaluator = COCOAPIEvaluator(cfg       = cfg,
														
 
															-                                     data_dir  = args.root,
														
 
															-                                     device    = device,
														
 
															-                                     transform = transform
														
 
															-                                     )
														
 
															-    ## Custom dataset Evaluator
														
 
															-    elif args.dataset == 'ourdataset':
														
 
															-        evaluator = CustomEvaluator(cfg       = cfg,
														
 
															-                                      data_dir  = args.root,
														
 
															-                                      device    = device,
														
 
															-                                      transform = transform
														
 
															-                                      )
														
 
															-
														
 
															-    return evaluator
														
--- a/yolo/evaluator/custom_evaluator.py
+++ b/yolo/evaluator/custom_evaluator.py
@@ -1,111 +0,0 @@
 
															-import json
														
 
															-import tempfile
														
 
															-import torch
														
 
															-from dataset.custom import CustomDataset
														
 
															-from utils.box_ops import rescale_bboxes
														
 
															-
														
 
															-try:
														
 
															-    from pycocotools.cocoeval import COCOeval
														
 
															-except:
														
 
															-    print("It seems that the COCOAPI is not installed.")
														
 
															-
														
 
															-
														
 
															-class CustomEvaluator():
														
 
															-    def __init__(self, cfg, data_dir, device, image_set='val', transform=None):
														
 
															-        # ----------------- Basic parameters -----------------
														
 
															-        self.image_set = image_set
														
 
															-        self.transform = transform
														
 
															-        self.device = device
														
 
															-        # ----------------- Metrics -----------------
														
 
															-        self.map = 0.
														
 
															-        self.ap50_95 = 0.
														
 
															-        self.ap50 = 0.
														
 
															-        # ----------------- Dataset -----------------
														
 
															-        self.dataset = CustomDataset(cfg, data_dir=data_dir, image_set=image_set, transform=None, is_train=False)
														
 
															-
														
 
															-    @torch.no_grad()
														
 
															-    def evaluate(self, model):
														
 
															-        """
														
 
															-        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
														
 
															-        and the results are evaluated by COCO API.
														
 
															-        Args:
														
 
															-            model : model object
														
 
															-        Returns:
														
 
															-            ap50_95 (float) : calculated COCO AP for IoU=50:95
														
 
															-            ap50 (float) : calculated COCO AP for IoU=50
														
 
															-        """
														
 
															-        model.eval()
														
 
															-        ids = []
														
 
															-        data_dict = []
														
 
															-        num_images = len(self.dataset)
														
 
															-        print('total number of images: %d' % (num_images))
														
 
															-
														
 
															-        # --------------- COCO-style evaluation ---------------
														
 
															-        for index in range(num_images):
														
 
															-            if index % 500 == 0:
														
 
															-                print('[Eval: %d / %d]'%(index, num_images))
														
 
															-
														
 
															-            # ----------- Load an image -----------
														
 
															-            img, img_id = self.dataset.pull_image(index)
														
 
															-            orig_h, orig_w, _ = img.shape
														
 
															-
														
 
															-            # ----------- Data preprocess -----------
														
 
															-            x, _, ratio = self.transform(img)
														
 
															-            x = x.unsqueeze(0).to(self.device)
														
 
															-            
														
 
															-            img_id = int(img_id)
														
 
															-            ids.append(img_id)
														
 
															-
														
 
															-            # ----------- Model inference -----------
														
 
															-            outputs = model(x)
														
 
															-            scores = outputs['scores']
														
 
															-            labels = outputs['labels']
														
 
															-            bboxes = outputs['bboxes']
														
 
															-
														
 
															-            # ----------- Rescale bboxes -----------
														
 
															-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
														
 
															-
														
 
															-            # ----------- Process results -----------
														
 
															-            for i, box in enumerate(bboxes):
														
 
															-                x1 = float(box[0])
														
 
															-                y1 = float(box[1])
														
 
															-                x2 = float(box[2])
														
 
															-                y2 = float(box[3])
														
 
															-                label = self.dataset.class_ids[int(labels[i])]
														
 
															-                
														
 
															-                # COCO box format: x1, y1, bw, bh
														
 
															-                bbox = [x1, y1, x2 - x1, y2 - y1]
														
 
															-                score = float(scores[i])
														
 
															-                 # COCO json format
														
 
															-                A = {"image_id":    img_id,
														
 
															-                     "category_id": label,
														
 
															-                     "bbox":        bbox,
														
 
															-                     "score":       score}
														
 
															-                data_dict.append(A)
														
 
															-
														
 
															-        annType = ['segm', 'bbox', 'keypoints']
														
 
															-
														
 
															-        # ------------- COCO Box detection evaluation -------------
														
 
															-        if len(data_dict) > 0:
														
 
															-            print('evaluating ......')
														
 
															-            cocoGt = self.dataset.coco
														
 
															-            _, tmp = tempfile.mkstemp()
														
 
															-            json.dump(data_dict, open(tmp, 'w'))
														
 
															-            cocoDt = cocoGt.loadRes(tmp)
														
 
															-            cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
														
 
															-            cocoEval.params.imgIds = ids
														
 
															-            cocoEval.evaluate()
														
 
															-            cocoEval.accumulate()
														
 
															-            cocoEval.summarize()
														
 
															-
														
 
															-            ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
														
 
															-            print('ap50_95 : ', ap50_95)
														
 
															-            print('ap50 : ', ap50)
														
 
															-            self.map = ap50_95
														
 
															-            self.ap50_95 = ap50_95
														
 
															-            self.ap50 = ap50
														
 
															-
														
 
															-            return ap50, ap50_95
														
 
															-        else:
														
 
															-            return 0, 0
														
 
															-
														
--- a/yolo/evaluator/coco_evaluator.py
+++ b/yolo/evaluator/coco_evaluator.py
@@ -4,13 +4,13 @@ import torch
 
															 from pycocotools.cocoeval import COCOeval
														
 
															 from dataset.coco import COCODataset
														
 
															+from dataset.voc  import VOCDataset
														
 
															 from utils.box_ops import rescale_bboxes
														
 
															-class COCOAPIEvaluator():
														
 
															-    def __init__(self, cfg, data_dir, device, transform=None):
														
 
															+class MapEvaluator():
														
 
															+    def __init__(self, dataset_name, cfg, data_dir, device, transform=None):
														
 
															         # ----------------- Basic parameters -----------------
														
 
															-        self.image_set = 'val2017'
														
 
															         self.transform = transform
														
 
															         self.device = device
														
 
															         # ----------------- Metrics -----------------
														
@@ -18,7 +18,12 @@ class COCOAPIEvaluator():
 
															         self.ap50_95 = 0.
														
 
															         self.ap50 = 0.
														
 
															         # ----------------- Dataset -----------------
														
 
															-        self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, image_set=self.image_set, transform=None, is_train=False)
														
 
															+        if   dataset_name == "coco":
														
 
															+            self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
														
 
															+        elif dataset_name == "voc":
														
 
															+            self.dataset = VOCDataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
														
 
															+        else:
														
 
															+            raise NotImplementedError("Unknown dataset name.")
														
 
															     @torch.no_grad()
														
 
															     def evaluate(self, model):
														
--- a/yolo/evaluator/voc_evaluator.py
+++ b/yolo/evaluator/voc_evaluator.py
@@ -1,348 +0,0 @@
 
															-"""Adapted from:
														
 
															-    @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
														
 
															-    @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
														
 
															-    Licensed under The MIT License [see LICENSE for details]
														
 
															-"""
														
 
															-
														
 
															-from dataset.voc import VOCDataset, VOC_CLASSES
														
 
															-import os
														
 
															-import time
														
 
															-import numpy as np
														
 
															-import pickle
														
 
															-import xml.etree.ElementTree as ET
														
 
															-
														
 
															-from utils.box_ops import rescale_bboxes
														
 
															-
														
 
															-
														
 
															-class VOCAPIEvaluator():
														
 
															-    """ VOC AP Evaluation class """
														
 
															-    def __init__(self,
														
 
															-                 cfg,
														
 
															-                 data_dir, 
														
 
															-                 device,
														
 
															-                 transform,
														
 
															-                 set_type='test', 
														
 
															-                 year='2007', 
														
 
															-                 display=False):
														
 
															-        # basic config
														
 
															-        self.data_dir = data_dir
														
 
															-        self.device = device
														
 
															-        self.labelmap = VOC_CLASSES
														
 
															-        self.set_type = set_type
														
 
															-        self.year = year
														
 
															-        self.display = display
														
 
															-        self.map = 0.
														
 
															-
														
 
															-        # transform
														
 
															-        self.transform = transform
														
 
															-
														
 
															-        # path
														
 
															-        time_stamp = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(time.time()))
														
 
															-        self.devkit_path = os.path.join(data_dir, 'VOC' + year)
														
 
															-        self.annopath = os.path.join(data_dir, 'VOC2007', 'Annotations', '%s.xml')
														
 
															-        self.imgpath = os.path.join(data_dir, 'VOC2007', 'JPEGImages', '%s.jpg')
														
 
															-        self.imgsetpath = os.path.join(data_dir, 'VOC2007', 'ImageSets', 'Main', set_type+'.txt')
														
 
															-        self.output_dir = self.get_output_dir('det_results/eval/voc_eval/{}'.format(time_stamp), self.set_type)
														
 
															-
														
 
															-        # dataset
														
 
															-        self.dataset = VOCDataset(
														
 
															-            cfg=cfg,
														
 
															-            data_dir=data_dir, 
														
 
															-            image_set=[('2007', set_type)],
														
 
															-            is_train=False)
														
 
															-        
														
 
															-    def evaluate(self, net):
														
 
															-        net.eval()
														
 
															-        num_images = len(self.dataset)
														
 
															-        # all detections are collected into:
														
 
															-        #    all_boxes[cls][image] = N x 5 array of detections in
														
 
															-        #    (x1, y1, x2, y2, score)
														
 
															-        self.all_boxes = [[[] for _ in range(num_images)]
														
 
															-                        for _ in range(len(self.labelmap))]
														
 
															-
														
 
															-        # timers
														
 
															-        det_file = os.path.join(self.output_dir, 'detections.pkl')
														
 
															-
														
 
															-        for i in range(num_images):
														
 
															-            img, _ = self.dataset.pull_image(i)
														
 
															-            orig_h, orig_w = img.shape[:2]
														
 
															-
														
 
															-            # preprocess
														
 
															-            x, _, ratio = self.transform(img)
														
 
															-            x = x.unsqueeze(0).to(self.device)
														
 
															-
														
 
															-            # forward
														
 
															-            t0 = time.time()
														
 
															-            outputs = net(x)
														
 
															-            scores = outputs['scores']
														
 
															-            labels = outputs['labels']
														
 
															-            bboxes = outputs['bboxes']
														
 
															-            detect_time = time.time() - t0
														
 
															-
														
 
															-            # rescale bboxes
														
 
															-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
														
 
															-
														
 
															-            for j in range(len(self.labelmap)):
														
 
															-                inds = np.where(labels == j)[0]
														
 
															-                if len(inds) == 0:
														
 
															-                    self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
														
 
															-                    continue
														
 
															-                c_bboxes = bboxes[inds]
														
 
															-                c_scores = scores[inds]
														
 
															-                c_dets = np.hstack((c_bboxes,
														
 
															-                                    c_scores[:, np.newaxis])).astype(np.float32,
														
 
															-                                                                    copy=False)
														
 
															-                self.all_boxes[j][i] = c_dets
														
 
															-
														
 
															-            if i % 500 == 0:
														
 
															-                print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time))
														
 
															-
														
 
															-        with open(det_file, 'wb') as f:
														
 
															-            pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL)
														
 
															-
														
 
															-        print('Evaluating detections')
														
 
															-        self.evaluate_detections(self.all_boxes)
														
 
															-
														
 
															-        print('Mean AP: ', self.map)
														
 
															-  
														
 
															-    def parse_rec(self, filename):
														
 
															-        """ Parse a PASCAL VOC xml file """
														
 
															-        tree = ET.parse(filename)
														
 
															-        objects = []
														
 
															-        for obj in tree.findall('object'):
														
 
															-            obj_struct = {}
														
 
															-            obj_struct['name'] = obj.find('name').text
														
 
															-            obj_struct['pose'] = obj.find('pose').text
														
 
															-            obj_struct['truncated'] = int(obj.find('truncated').text)
														
 
															-            obj_struct['difficult'] = int(obj.find('difficult').text)
														
 
															-            bbox = obj.find('bndbox')
														
 
															-            obj_struct['bbox'] = [int(bbox.find('xmin').text),
														
 
															-                                int(bbox.find('ymin').text),
														
 
															-                                int(bbox.find('xmax').text),
														
 
															-                                int(bbox.find('ymax').text)]
														
 
															-            objects.append(obj_struct)
														
 
															-
														
 
															-        return objects
														
 
															-
														
 
															-    def get_output_dir(self, name, phase):
														
 
															-        """Return the directory where experimental artifacts are placed.
														
 
															-        If the directory does not exist, it is created.
														
 
															-        A canonical path is built using the name from an imdb and a network
														
 
															-        (if not None).
														
 
															-        """
														
 
															-        filedir = os.path.join(name, phase)
														
 
															-        if not os.path.exists(filedir):
														
 
															-            os.makedirs(filedir, exist_ok=True)
														
 
															-        return filedir
														
 
															-
														
 
															-    def get_voc_results_file_template(self, cls):
														
 
															-        # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
														
 
															-        filename = 'det_' + self.set_type + '_%s.txt' % (cls)
														
 
															-        filedir = os.path.join(self.devkit_path, 'results')
														
 
															-        if not os.path.exists(filedir):
														
 
															-            os.makedirs(filedir)
														
 
															-        path = os.path.join(filedir, filename)
														
 
															-        return path
														
 
															-
														
 
															-    def write_voc_results_file(self, all_boxes):
														
 
															-        for cls_ind, cls in enumerate(self.labelmap):
														
 
															-            if self.display:
														
 
															-                print('Writing {:s} VOC results file'.format(cls))
														
 
															-            filename = self.get_voc_results_file_template(cls)
														
 
															-            with open(filename, 'wt') as f:
														
 
															-                for im_ind, index in enumerate(self.dataset.ids):
														
 
															-                    dets = all_boxes[cls_ind][im_ind]
														
 
															-                    if len(dets) == 0:
														
 
															-                        continue
														
 
															-                    # the VOCdevkit expects 1-based indices
														
 
															-                    for k in range(dets.shape[0]):
														
 
															-                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
														
 
															-                                format(index[1], dets[k, -1],
														
 
															-                                    dets[k, 0] + 1, dets[k, 1] + 1,
														
 
															-                                    dets[k, 2] + 1, dets[k, 3] + 1))
														
 
															-
														
 
															-    def do_python_eval(self, use_07=True):
														
 
															-        cachedir = os.path.join(self.devkit_path, 'annotations_cache')
														
 
															-        aps = []
														
 
															-        # The PASCAL VOC metric changed in 2010
														
 
															-        use_07_metric = use_07
														
 
															-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
														
 
															-        if not os.path.isdir(self.output_dir):
														
 
															-            os.mkdir(self.output_dir)
														
 
															-        for i, cls in enumerate(self.labelmap):
														
 
															-            filename = self.get_voc_results_file_template(cls)
														
 
															-            rec, prec, ap = self.voc_eval(detpath=filename, 
														
 
															-                                          classname=cls, 
														
 
															-                                          cachedir=cachedir, 
														
 
															-                                          ovthresh=0.5, 
														
 
															-                                          use_07_metric=use_07_metric
														
 
															-                                        )
														
 
															-            aps += [ap]
														
 
															-            print('AP for {} = {:.4f}'.format(cls, ap))
														
 
															-            with open(os.path.join(self.output_dir, cls + '_pr.pkl'), 'wb') as f:
														
 
															-                pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
														
 
															-        if self.display:
														
 
															-            self.map = np.mean(aps)
														
 
															-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
														
 
															-            print('~~~~~~~~')
														
 
															-            print('Results:')
														
 
															-            for ap in aps:
														
 
															-                print('{:.3f}'.format(ap))
														
 
															-            print('{:.3f}'.format(np.mean(aps)))
														
 
															-            print('~~~~~~~~')
														
 
															-            print('')
														
 
															-            print('--------------------------------------------------------------')
														
 
															-            print('Results computed with the **unofficial** Python eval code.')
														
 
															-            print('Results should be very close to the official MATLAB eval code.')
														
 
															-            print('--------------------------------------------------------------')
														
 
															-        else:
														
 
															-            self.map = np.mean(aps)
														
 
															-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
														
 
															-
														
 
															-    def voc_ap(self, rec, prec, use_07_metric=True):
														
 
															-        """ ap = voc_ap(rec, prec, [use_07_metric])
														
 
															-        Compute VOC AP given precision and recall.
														
 
															-        If use_07_metric is true, uses the
														
 
															-        VOC 07 11 point method (default:True).
														
 
															-        """
														
 
															-        if use_07_metric:
														
 
															-            # 11 point metric
														
 
															-            ap = 0.
														
 
															-            for t in np.arange(0., 1.1, 0.1):
														
 
															-                if np.sum(rec >= t) == 0:
														
 
															-                    p = 0
														
 
															-                else:
														
 
															-                    p = np.max(prec[rec >= t])
														
 
															-                ap = ap + p / 11.
														
 
															-        else:
														
 
															-            # correct AP calculation
														
 
															-            # first append sentinel values at the end
														
 
															-            mrec = np.concatenate(([0.], rec, [1.]))
														
 
															-            mpre = np.concatenate(([0.], prec, [0.]))
														
 
															-
														
 
															-            # compute the precision envelope
														
 
															-            for i in range(mpre.size - 1, 0, -1):
														
 
															-                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
														
 
															-
														
 
															-            # to calculate area under PR curve, look for points
														
 
															-            # where X axis (recall) changes value
														
 
															-            i = np.where(mrec[1:] != mrec[:-1])[0]
														
 
															-
														
 
															-            # and sum (\Delta recall) * prec
														
 
															-            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
														
 
															-        return ap
														
 
															-
														
 
															-    def voc_eval(self, detpath, classname, cachedir, ovthresh=0.5, use_07_metric=True):
														
 
															-        if not os.path.isdir(cachedir):
														
 
															-            os.mkdir(cachedir)
														
 
															-        cachefile = os.path.join(cachedir, 'annots.pkl')
														
 
															-        # read list of images
														
 
															-        with open(self.imgsetpath, 'r') as f:
														
 
															-            lines = f.readlines()
														
 
															-        imagenames = [x.strip() for x in lines]
														
 
															-        if not os.path.isfile(cachefile):
														
 
															-            # load annots
														
 
															-            recs = {}
														
 
															-            for i, imagename in enumerate(imagenames):
														
 
															-                recs[imagename] = self.parse_rec(self.annopath % (imagename))
														
 
															-                if i % 100 == 0 and self.display:
														
 
															-                    print('Reading annotation for {:d}/{:d}'.format(
														
 
															-                    i + 1, len(imagenames)))
														
 
															-            # save
														
 
															-            if self.display:
														
 
															-                print('Saving cached annotations to {:s}'.format(cachefile))
														
 
															-            with open(cachefile, 'wb') as f:
														
 
															-                pickle.dump(recs, f)
														
 
															-        else:
														
 
															-            # load
														
 
															-            with open(cachefile, 'rb') as f:
														
 
															-                recs = pickle.load(f)
														
 
															-
														
 
															-        # extract gt objects for this class
														
 
															-        class_recs = {}
														
 
															-        npos = 0
														
 
															-        for imagename in imagenames:
														
 
															-            R = [obj for obj in recs[imagename] if obj['name'] == classname]
														
 
															-            bbox = np.array([x['bbox'] for x in R])
														
 
															-            difficult = np.array([x['difficult'] for x in R]).astype(np.bool_)
														
 
															-            det = [False] * len(R)
														
 
															-            npos = npos + sum(~difficult)
														
 
															-            class_recs[imagename] = {'bbox': bbox,
														
 
															-                                    'difficult': difficult,
														
 
															-                                    'det': det}
														
 
															-
														
 
															-        # read dets
														
 
															-        detfile = detpath.format(classname)
														
 
															-        with open(detfile, 'r') as f:
														
 
															-            lines = f.readlines()
														
 
															-        if any(lines) == 1:
														
 
															-
														
 
															-            splitlines = [x.strip().split(' ') for x in lines]
														
 
															-            image_ids = [x[0] for x in splitlines]
														
 
															-            confidence = np.array([float(x[1]) for x in splitlines])
														
 
															-            BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
														
 
															-
														
 
															-            # sort by confidence
														
 
															-            sorted_ind = np.argsort(-confidence)
														
 
															-            sorted_scores = np.sort(-confidence)
														
 
															-            BB = BB[sorted_ind, :]
														
 
															-            image_ids = [image_ids[x] for x in sorted_ind]
														
 
															-
														
 
															-            # go down dets and mark TPs and FPs
														
 
															-            nd = len(image_ids)
														
 
															-            tp = np.zeros(nd)
														
 
															-            fp = np.zeros(nd)
														
 
															-            for d in range(nd):
														
 
															-                R = class_recs[image_ids[d]]
														
 
															-                bb = BB[d, :].astype(float)
														
 
															-                ovmax = -np.inf
														
 
															-                BBGT = R['bbox'].astype(float)
														
 
															-                if BBGT.size > 0:
														
 
															-                    # compute overlaps
														
 
															-                    # intersection
														
 
															-                    ixmin = np.maximum(BBGT[:, 0], bb[0])
														
 
															-                    iymin = np.maximum(BBGT[:, 1], bb[1])
														
 
															-                    ixmax = np.minimum(BBGT[:, 2], bb[2])
														
 
															-                    iymax = np.minimum(BBGT[:, 3], bb[3])
														
 
															-                    iw = np.maximum(ixmax - ixmin, 0.)
														
 
															-                    ih = np.maximum(iymax - iymin, 0.)
														
 
															-                    inters = iw * ih
														
 
															-                    uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
														
 
															-                        (BBGT[:, 2] - BBGT[:, 0]) *
														
 
															-                        (BBGT[:, 3] - BBGT[:, 1]) - inters)
														
 
															-                    overlaps = inters / uni
														
 
															-                    ovmax = np.max(overlaps)
														
 
															-                    jmax = np.argmax(overlaps)
														
 
															-
														
 
															-                if ovmax > ovthresh:
														
 
															-                    if not R['difficult'][jmax]:
														
 
															-                        if not R['det'][jmax]:
														
 
															-                            tp[d] = 1.
														
 
															-                            R['det'][jmax] = 1
														
 
															-                        else:
														
 
															-                            fp[d] = 1.
														
 
															-                else:
														
 
															-                    fp[d] = 1.
														
 
															-
														
 
															-            # compute precision recall
														
 
															-            fp = np.cumsum(fp)
														
 
															-            tp = np.cumsum(tp)
														
 
															-            rec = tp / float(npos)
														
 
															-            # avoid divide by zero in case the first detection matches a difficult
														
 
															-            # ground truth
														
 
															-            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
														
 
															-            ap = self.voc_ap(rec, prec, use_07_metric)
														
 
															-        else:
														
 
															-            rec = -1.
														
 
															-            prec = -1.
														
 
															-            ap = -1.
														
 
															-
														
 
															-        return rec, prec, ap
														
 
															-
														
 
															-    def evaluate_detections(self, box_list):
														
 
															-        self.write_voc_results_file(box_list)
														
 
															-        self.do_python_eval()
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    pass
														
--- a/yolo/models/__init__.py
+++ b/yolo/models/__init__.py
@@ -50,7 +50,7 @@ def build_model(args, cfg, is_val=False):
 
															     if is_val:
														
 
															         # ------------ Load pretrained weight ------------
														
 
															-        if args.pretrained is not None:
														
 
															+        if hasattr(args, "pretrained") and args.pretrained is not None:
														
 
															             print('Loading COCO pretrained weight ...')
														
 
															             checkpoint = torch.load(args.pretrained, map_location='cpu')
														
 
															             # checkpoint state dict
														
@@ -72,7 +72,7 @@ def build_model(args, cfg, is_val=False):
 
															             model.load_state_dict(checkpoint_state_dict, strict=False)
														
 
															         # ------------ Keep training from the given checkpoint ------------
														
 
															-        if args.resume and args.resume != "None":
														
 
															+        if hasattr(args, "resume") and args.resume and args.resume != "None":
														
 
															             checkpoint = torch.load(args.resume, map_location='cpu')
														
 
															             # checkpoint state dict
														
 
															             try:
														
--- a/yolo/tools/convert_voc_to_coco.py
+++ b/yolo/tools/convert_voc_to_coco.py
@@ -0,0 +1,163 @@
 
															+import cv2
														
 
															+import random
														
 
															+import numpy as np
														
 
															+import os.path as osp
														
 
															+import xml.etree.ElementTree as ET
														
 
															+import torch.utils.data as data
														
 
															+
														
 
															+voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
														
 
															+voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
														
 
															+
														
 
															+
														
 
															+class VOCAnnotationTransform(object):
														
 
															+    def __init__(self, class_to_ind=None, keep_difficult=False):
														
 
															+        self.class_to_ind = class_to_ind or dict(
														
 
															+            zip(voc_class_labels, range(len(voc_class_labels))))
														
 
															+        self.keep_difficult = keep_difficult
														
 
															+
														
 
															+    def __call__(self, target):
														
 
															+        res = []
														
 
															+        for obj in target.iter('object'):
														
 
															+            difficult = int(obj.find('difficult').text) == 1
														
 
															+            if not self.keep_difficult and difficult:
														
 
															+                continue
														
 
															+            name = obj.find('name').text.lower().strip()
														
 
															+            bbox = obj.find('bndbox')
														
 
															+
														
 
															+            pts = ['xmin', 'ymin', 'xmax', 'ymax']
														
 
															+            bndbox = []
														
 
															+            for i, pt in enumerate(pts):
														
 
															+                cur_pt = int(bbox.find(pt).text) - 1
														
 
															+                bndbox.append(cur_pt)
														
 
															+            label_idx = self.class_to_ind[name]
														
 
															+            bndbox.append(label_idx)
														
 
															+            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
														
 
															+
														
 
															+        return res  # [[x1, y1, x2, y2, label_ind], ... ]
														
 
															+
														
 
															+class VOCDataset(data.Dataset):
														
 
															+    def __init__(self, 
														
 
															+                 root   :str = None, 
														
 
															+                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
														
 
															+                 is_train   :bool =False,
														
 
															+                 ):
														
 
															+        # ----------- Basic parameters -----------
														
 
															+        self.image_set = image_set
														
 
															+        self.is_train  = is_train
														
 
															+        self.num_classes = 20
														
 
															+        # ----------- Path parameters -----------
														
 
															+        self.root = root
														
 
															+        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
														
 
															+        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
														
 
															+        # ----------- Data parameters -----------
														
 
															+        self.ids = list()
														
 
															+        for (year, name) in image_set:
														
 
															+            rootpath = osp.join(self.root, 'VOC' + year)
														
 
															+            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
														
 
															+                self.ids.append((rootpath, line.strip()))
														
 
															+        self.dataset_size = len(self.ids)
														
 
															+        self.class_labels = voc_class_labels
														
 
															+        self.class_indexs = voc_class_indexs
														
 
															+        # ----------- Transform parameters -----------
														
 
															+        self.target_transform = VOCAnnotationTransform()
														
 
															+
														
 
															+    def __len__(self):
														
 
															+        return self.dataset_size
														
 
															+
														
 
															+    def pull_item(self, index):
														
 
															+        # load an image
														
 
															+        img_id = self.ids[index]
														
 
															+        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
														
 
															+        height, width, channels = image.shape
														
 
															+
														
 
															+        # laod an annotation
														
 
															+        anno = ET.parse(self._annopath % img_id).getroot()
														
 
															+        anno = self.target_transform(anno)
														
 
															+
														
 
															+        # guard against no boxes via resizing
														
 
															+        anno = np.array(anno).reshape(-1, 5)
														
 
															+        bboxes = anno[:, :4]  # [N, 4]
														
 
															+        labels = anno[:, 4]   # [N,]
														
 
															+        target = {
														
 
															+            "file_name": "{}.jpg".format(img_id[-1]),
														
 
															+            "bboxes": bboxes,
														
 
															+            "labels": labels,
														
 
															+            "orig_size": [height, width],
														
 
															+            "id": index,
														
 
															+        }
														
 
															+        
														
 
															+        return target
														
 
															+
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    import json
														
 
															+
														
 
															+    # json_file = "D:\\python_work\\dataset\\COCO\\annotations\\instances_val2017.json"
														
 
															+    # with open(json_file, 'r') as f:
														
 
															+    #     data_dict = json.load(f)
														
 
															+    # print(data_dict['info'])
														
 
															+    # print(data_dict.keys())
														
 
															+    # print(len(data_dict["annotations"]))
														
 
															+    # print(len(data_dict["images"]))
														
 
															+    # print(data_dict["images"][0])
														
 
															+    # print(data_dict["images"][1])
														
 
															+    # print(data_dict["images"][2])
														
 
															+    # print(data_dict["annotations"][0])
														
 
															+    # print(data_dict["annotations"][1])
														
 
															+    # print(data_dict["annotations"][2])
														
 
															+    # exit()
														
 
															+
														
 
															+    # opt
														
 
															+    is_train = False
														
 
															+    dataset = VOCDataset(root='D:/python_work/dataset/VOCdevkit/',
														
 
															+                         image_set=[('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')],
														
 
															+                         is_train=is_train,
														
 
															+                         )
														
 
															+    
														
 
															+    print('Data length: ', len(dataset))
														
 
															+
														
 
															+    coco_dict = {
														
 
															+        "images": [],
														
 
															+        "annotations": [],
														
 
															+        "type": "instances",
														
 
															+        "categories": [{'supercategory': name, "id": i, 'name': name} for i, name in enumerate(voc_class_labels)]
														
 
															+    }
														
 
															+    anno_id = 0
														
 
															+    for i in range(len(dataset)):
														
 
															+        if i % 1000 == 0:
														
 
															+            print(" - [{}] / [{}] ...".format(i, len(dataset)))
														
 
															+
														
 
															+        target = dataset.pull_item(i)
														
 
															+
														
 
															+        # images info.
														
 
															+        file_name = target["file_name"]
														
 
															+        height, width = target["orig_size"]
														
 
															+        id = int(target["id"])
														
 
															+
														
 
															+        coco_dict["images"].append({
														
 
															+            'file_name': file_name,
														
 
															+            'height': height,
														
 
															+            'width': width,
														
 
															+            'id': id
														
 
															+        })
														
 
															+
														
 
															+        # annotation info.
														
 
															+        bboxes = target["bboxes"]
														
 
															+        labels = target["labels"]
														
 
															+
														
 
															+        for bbox, label in zip(bboxes, labels):
														
 
															+            x1, y1, x2, y2 = bbox
														
 
															+            coco_dict["annotations"].append({
														
 
															+                'bbox': [int(x1), int(y1), int(x2 - x1), int(y2 - y1)],
														
 
															+                'area': int((x2 - x1) * (y2 - y1)),
														
 
															+                'category_id': int(label),
														
 
															+                'image_id': id,
														
 
															+                'id': anno_id,
														
 
															+                'iscrowd': 0,
														
 
															+            })
														
 
															+            anno_id += 1
														
 
															+
														
 
															+    json_file = "D:\\python_work\\dataset\\VOCdevkit\\annotations\\instances_val.json"
														
 
															+    with open(json_file, 'w') as f:
														
 
															+        json.dump(coco_dict, f, indent=4)
														
 
															+    print(f"Data saved to {json_file}")
														
--- a/yolo/train.py
+++ b/yolo/train.py
@@ -23,7 +23,7 @@ from config import build_config
 
															 from dataset.build import build_dataset, build_transform
														
 
															 # ----------------- Evaluator Components -----------------
														
 
															-from evaluator.build import build_evluator
														
 
															+from evaluator.map_evaluator import MapEvaluator
														
 
															 # ----------------- Model Components -----------------
														
 
															 from models import build_model
														
@@ -149,7 +149,12 @@ def train():
 
															     train_loader = build_dataloader(args, dataset, args.batch_size // world_size, CollateFunc())
														
 
															     # ---------------------------- Build Evaluator ----------------------------
														
 
															-    evaluator = build_evluator(args, cfg, val_transform, device)
														
 
															+    evaluator = MapEvaluator(cfg = cfg,
														
 
															+                             dataset_name = args.dataset,
														
 
															+                             data_dir     = args.root,
														
 
															+                             device       = device,
														
 
															+                             transform    = val_transform
														
 
															+                             )
														
 
															     # ---------------------------- Build model ----------------------------
														
 
															     ## Build model