Ver código fonte

modify voc dataset & evaluator

yjh0410 1 ano atrás
pai
commit
55b0dc15b2

+ 6 - 10
yolo/dataset/build.py

@@ -2,18 +2,18 @@ import os
 
 try:
     # dataset class
-    from .voc        import VOCDataset
-    from .coco       import COCODataset
-    from .custom     import CustomDataset
+    from .voc     import VOCDataset
+    from .coco    import COCODataset
+    from .custom  import CustomDataset
     # transform class
     from .data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
     from .data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
 
 except:
     # dataset class
-    from voc        import VOCDataset
-    from coco       import COCODataset
-    from yolo.dataset.custom   import CustomDataset
+    from voc     import VOCDataset
+    from coco    import COCODataset
+    from custom  import CustomDataset
     # transform class
     from data_augment.yolo_augment import YOLOAugmentation, YOLOBaseTransform
     from data_augment.ssd_augment  import SSDAugmentation, SSDBaseTransform
@@ -24,19 +24,15 @@ def build_dataset(args, cfg, transform=None, is_train=False):
     # ------------------------- Build dataset -------------------------
     ## VOC dataset
     if args.dataset == 'voc':
-        image_set = [('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')]
         dataset = VOCDataset(cfg       = cfg,
                              data_dir  = args.root,
-                             image_set = image_set,
                              transform = transform,
                              is_train  = is_train,
                              )
     ## COCO dataset
     elif args.dataset == 'coco':
-        image_set = 'train2017' if is_train else 'val2017'
         dataset = COCODataset(cfg       = cfg,
                               data_dir  = args.root,
-                              image_set = image_set,
                               transform = transform,
                               is_train  = is_train,
                               )

+ 11 - 13
yolo/dataset/coco.py

@@ -25,19 +25,18 @@ class COCODataset(Dataset):
     def __init__(self, 
                  cfg,
                  data_dir  :str = None, 
-                 image_set :str = 'train2017',
                  transform = None,
                  is_train  :bool = False,
                  use_mask  :bool = False,
                  ):
         # ----------- Basic parameters -----------
         self.data_dir  = data_dir
-        self.image_set = image_set
+        self.image_set = "train2017" if is_train else "val2017"
         self.is_train  = is_train
         self.use_mask  = use_mask
         self.num_classes = 80
         # ----------- Data parameters -----------
-        self.json_file = coco_json_files['{}'.format(image_set)]
+        self.json_file = coco_json_files['{}'.format(self.image_set)]
         self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
         self.ids = self.coco.getImgIds()
         self.class_ids = sorted(self.coco.getCatIds())
@@ -148,19 +147,18 @@ class COCODataset(Dataset):
         return image, target, deltas
 
     def pull_image(self, index):
-        img_id = self.ids[index]
-        img_file = os.path.join(self.data_dir, self.image_set,
-                                '{:012}'.format(img_id) + '.jpg')
-        image = cv2.imread(img_file)
+        # get the image file name
+        image_dict = self.coco.dataset['images'][index]
+        image_id = image_dict["id"]
+        filename = image_dict["file_name"]
 
-        if self.json_file == 'instances_val5k.json' and image is None:
-            img_file = os.path.join(self.data_dir, 'train2017',
-                                    '{:012}'.format(img_id) + '.jpg')
-            image = cv2.imread(img_file)
+        # load the image
+        image_path = os.path.join(self.data_dir, self.image_set, filename)
+        image = cv2.imread(image_path)
 
         assert image is not None
 
-        return image, img_id
+        return image, image_id
 
     def pull_anno(self, index):
         img_id = self.ids[index]
@@ -265,7 +263,7 @@ if __name__ == "__main__":
         cfg = SSDBaseConfig()
 
     transform = build_transform(cfg, args.is_train)
-    dataset = COCODataset(cfg, args.root, 'val2017', transform, args.is_train)
+    dataset = COCODataset(cfg, args.root, transform, args.is_train)
     
     np.random.seed(0)
     class_colors = [(np.random.randint(255),

+ 5 - 6
yolo/dataset/custom.py

@@ -20,19 +20,18 @@ class CustomDataset(Dataset):
     def __init__(self, 
                  cfg,
                  data_dir     :str = None, 
-                 image_set    :str = 'train2017',
                  transform    = None,
                  is_train     :bool =False,
                  ):
         # ----------- Basic parameters -----------
-        self.image_set = image_set
+        self.image_set = "train" if is_train else "val"
         self.is_train  = is_train
         self.num_classes = len(custom_class_labels)
         # ----------- Path parameters -----------
         self.data_dir = data_dir
-        self.json_file = '{}.json'.format(image_set)
+        self.json_file = '{}.json'.format(self.image_set)
         # ----------- Data parameters -----------
-        self.coco = COCO(os.path.join(self.data_dir, image_set, 'annotations', self.json_file))
+        self.coco = COCO(os.path.join(self.data_dir, self.image_set, 'annotations', self.json_file))
         self.ids = self.coco.getImgIds()
         self.class_ids = sorted(self.coco.getCatIds())
         self.dataset_size = len(self.ids)
@@ -53,7 +52,7 @@ class CustomDataset(Dataset):
             self.mosaic_augment = None
             self.mixup_augment  = None
         print('==============================')
-        print('Image Set: {}'.format(image_set))
+        print('Image Set: {}'.format(self.image_set))
         print('Json file: {}'.format(self.json_file))
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
@@ -255,7 +254,7 @@ if __name__ == "__main__":
         cfg = SSDBaseConfig()
 
     transform = build_transform(cfg, args.is_train)
-    dataset = CustomDataset(cfg, args.root, 'val', transform, args.is_train)
+    dataset = CustomDataset(cfg, args.root, transform, args.is_train)
     
     np.random.seed(0)
     class_colors = [(np.random.randint(255),

+ 0 - 70
yolo/dataset/scripts/data_to_h5py.py

@@ -1,70 +0,0 @@
-import cv2
-import h5py
-import os
-import argparse
-import numpy as np
-import sys
-
-sys.path.append('..')
-from voc import VOCDetection
-from coco import COCODataset
-
-# ---------------------- Opt ----------------------
-parser = argparse.ArgumentParser(description='Cache-Dataset')
-parser.add_argument('-d', '--dataset', default='voc',
-                    help='coco, voc, widerface, crowdhuman')
-parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
-                    help='data root')
-parser.add_argument('-size', '--img_size', default=640, type=int,
-                    help='input image size.')
-parser.add_argument('--mosaic', default=None, type=float,
-                    help='mosaic augmentation.')
-parser.add_argument('--mixup', default=None, type=float,
-                    help='mixup augmentation.')
-parser.add_argument('--keep_ratio', action="store_true", default=False,
-                    help='keep aspect ratio.')
-parser.add_argument('--show', action="store_true", default=False,
-                    help='keep aspect ratio.')
-
-args = parser.parse_args()
-
-
-# ---------------------- Build Dataset ----------------------
-if args.dataset == 'voc':
-    root = os.path.join(args.root, 'VOCdevkit')
-    dataset = VOCDetection(args.img_size, root)
-elif args.dataset == 'coco':
-    root = os.path.join(args.root, 'COCO')
-    dataset = COCODataset(args.img_size, args.root)
-print('Data length: ', len(dataset))
-
-
-# ---------------------- Main Process ----------------------
-cached_image = []
-dataset_size = len(dataset)
-for i in range(len(dataset)):
-    if i % 5000 == 0:
-        print("[{} / {}]".format(i, dataset_size))
-    # load an image
-    image, image_id = dataset.pull_image(i)
-    orig_h, orig_w, _ = image.shape
-
-    # resize image
-    if args.keep_ratio:
-        r = args.img_size / max(orig_h, orig_w)
-        if r != 1: 
-            interp = cv2.INTER_LINEAR
-            new_size = (int(orig_w * r), int(orig_h * r))
-            image = cv2.resize(image, new_size, interpolation=interp)
-    else:
-        image = cv2.resize(image, (int(args.img_size), int(args.img_size)))
-
-    cached_image.append(image)
-    if args.show:
-        cv2.imshow('image', image)
-        # cv2.imwrite(str(i)+'.jpg', img)
-        cv2.waitKey(0)
-
-save_path = "dataset/cache/"
-os.makedirs(save_path, exist_ok=True)
-np.save(save_path + '{}_train_images.npy'.format(args.dataset), cached_image)

+ 74 - 74
yolo/dataset/voc.py

@@ -1,9 +1,10 @@
+import os
 import cv2
+import time
 import random
 import numpy as np
-import os.path as osp
-import xml.etree.ElementTree as ET
-import torch.utils.data as data
+from torch.utils.data import Dataset
+from pycocotools.coco import COCO
 
 try:
     from .data_augment.strong_augment import MosaicAugment, MixupAugment
@@ -11,65 +12,31 @@ except:
     from  data_augment.strong_augment import MosaicAugment, MixupAugment
 
 
-VOC_CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
 voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
 voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
 
 
-class VOCAnnotationTransform(object):
-    def __init__(self, class_to_ind=None, keep_difficult=False):
-        self.class_to_ind = class_to_ind or dict(
-            zip(VOC_CLASSES, range(len(VOC_CLASSES))))
-        self.keep_difficult = keep_difficult
-
-    def __call__(self, target):
-        res = []
-        for obj in target.iter('object'):
-            difficult = int(obj.find('difficult').text) == 1
-            if not self.keep_difficult and difficult:
-                continue
-            name = obj.find('name').text.lower().strip()
-            bbox = obj.find('bndbox')
-
-            pts = ['xmin', 'ymin', 'xmax', 'ymax']
-            bndbox = []
-            for i, pt in enumerate(pts):
-                cur_pt = int(bbox.find(pt).text) - 1
-                bndbox.append(cur_pt)
-            label_idx = self.class_to_ind[name]
-            bndbox.append(label_idx)
-            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
-
-        return res  # [[x1, y1, x2, y2, label_ind], ... ]
-
-
-class VOCDataset(data.Dataset):
+class VOCDataset(Dataset):
     def __init__(self, 
                  cfg,
-                 data_dir   :str = None, 
-                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
-                 transform  = None,
-                 is_train   :bool =False,
+                 data_dir  :str = None, 
+                 transform = None,
+                 is_train  :bool = False,
                  ):
         # ----------- Basic parameters -----------
-        self.image_set = image_set
+        self.data_dir  = data_dir
+        self.image_set = "train" if is_train else "val"
         self.is_train  = is_train
         self.num_classes = 20
-        # ----------- Path parameters -----------
-        self.root = data_dir
-        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
-        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
         # ----------- Data parameters -----------
-        self.ids = list()
-        for (year, name) in image_set:
-            rootpath = osp.join(self.root, 'VOC' + year)
-            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
-                self.ids.append((rootpath, line.strip()))
+        self.json_file = "instances_{}.json".format(self.image_set)
+        self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
+        self.ids = self.coco.getImgIds()
+        self.class_ids = sorted(self.coco.getCatIds())
         self.dataset_size = len(self.ids)
         self.class_labels = voc_class_labels
         self.class_indexs = voc_class_indexs
         # ----------- Transform parameters -----------
-        self.target_transform = VOCAnnotationTransform()
         self.transform = transform
         if is_train:
             self.mosaic_prob = cfg.mosaic_prob
@@ -85,16 +52,15 @@ class VOCDataset(data.Dataset):
             self.mixup_augment  = None
         print('==============================')
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
-        print('use Mixup Augmentation:  {}'.format(self.mixup_prob))
+        print('use Mixup Augmentation: {}'.format(self.mixup_prob))
         print('use Copy-paste Augmentation: {}'.format(self.copy_paste))
 
     # ------------ Basic dataset function ------------
-    def __getitem__(self, index):
-        image, target, deltas = self.pull_item(index)
-        return image, target, deltas
-
     def __len__(self):
-        return self.dataset_size
+        return len(self.ids)
+
+    def __getitem__(self, index):
+        return self.pull_item(index)
 
     # ------------ Mosaic & Mixup ------------
     def load_mosaic(self, index):
@@ -138,17 +104,14 @@ class VOCDataset(data.Dataset):
         image, _ = self.pull_image(index)
         height, width, channels = image.shape
 
-        # laod an annotation
-        anno, _ = self.pull_anno(index)
-
-        # guard against no boxes via resizing
-        anno = np.array(anno).reshape(-1, 5)
+        # load a target
+        bboxes, labels = self.pull_anno(index)
         target = {
-            "boxes": anno[:, :4],
-            "labels": anno[:, 4],
+            "boxes": bboxes,
+            "labels": labels,
             "orig_size": [height, width]
         }
-        
+
         return image, target
 
     def pull_item(self, index):
@@ -177,17 +140,54 @@ class VOCDataset(data.Dataset):
         return image, target, deltas
 
     def pull_image(self, index):
-        img_id = self.ids[index]
-        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
+        # get the image file name
+        image_dict = self.coco.dataset['images'][index]
+        image_id = image_dict["id"]
+        filename = image_dict["file_name"]
+
+        # load the image
+        image_path = os.path.join(self.data_dir, "images", filename)
+        image = cv2.imread(image_path)
 
-        return image, img_id
+        assert image is not None
+
+        return image, image_id
 
     def pull_anno(self, index):
-        img_id = self.ids[index]
-        anno = ET.parse(self._annopath % img_id).getroot()
-        anno = self.target_transform(anno)
+        img_id = self.ids[index]        
+        # image infor
+        im_ann = self.coco.loadImgs(img_id)[0]
+        width = im_ann['width']
+        height = im_ann['height']
 
-        return anno, img_id
+        # annotation infor
+        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=None)
+        annotations = self.coco.loadAnns(anno_ids)
+
+        
+        #load a target
+        bboxes = []
+        labels = []
+        for anno in annotations:
+            if 'bbox' in anno and anno['area'] > 0:
+                # bbox
+                x1 = np.max((0, anno['bbox'][0]))
+                y1 = np.max((0, anno['bbox'][1]))
+                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
+                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
+                if x2 < x1 or y2 < y1:
+                    continue
+                # class label
+                cls_id = self.class_ids.index(anno['category_id'])
+                
+                bboxes.append([x1, y1, x2, y2])
+                labels.append(cls_id)
+
+        # guard against no boxes via resizing
+        bboxes = np.array(bboxes).reshape(-1, 4)
+        labels = np.array(labels).reshape(-1)
+        
+        return bboxes, labels
 
 
 if __name__ == "__main__":
@@ -195,16 +195,16 @@ if __name__ == "__main__":
     import argparse
     from build import build_transform
     
-    parser = argparse.ArgumentParser(description='VOC-Dataset')
+    parser = argparse.ArgumentParser(description='COCO-Dataset')
 
     # opt
-    parser.add_argument('--root', default='D:/python_work/dataset/VOCdevkit/',
+    parser.add_argument('--root', default="D:/python_work/dataset/VOCdevkit/",
                         help='data root')
     parser.add_argument('--is_train', action="store_true", default=False,
-                        help='train or not.')
+                        help='mixup augmentation.')
     parser.add_argument('--aug_type', default="yolo", type=str, choices=["yolo", "ssd"],
                         help='yolo, ssd.')
-    
+
     args = parser.parse_args()
 
     class YoloBaseConfig(object):
@@ -258,7 +258,7 @@ if __name__ == "__main__":
         cfg = SSDBaseConfig()
 
     transform = build_transform(cfg, args.is_train)
-    dataset = VOCDataset(cfg, args.root, [('2007', 'test')], transform, args.is_train)
+    dataset = VOCDataset(cfg, args.root, transform, args.is_train)
     
     np.random.seed(0)
     class_colors = [(np.random.randint(255),
@@ -314,4 +314,4 @@ if __name__ == "__main__":
             cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
         cv2.imshow('gt', image)
         # cv2.imwrite(str(i)+'.jpg', img)
-        cv2.waitKey(0)
+        cv2.waitKey(0)

+ 8 - 44
yolo/eval.py

@@ -2,9 +2,7 @@ import argparse
 import torch
 
 # evaluators
-from evaluator.voc_evaluator    import VOCAPIEvaluator
-from evaluator.coco_evaluator   import COCOAPIEvaluator
-from evaluator.custom_evaluator import CustomEvaluator
+from evaluator.map_evaluator import MapEvaluator
 
 # load transform
 from dataset.build import build_dataset, build_transform
@@ -47,40 +45,6 @@ def parse_args():
     return parser.parse_args()
 
 
-
-def voc_test(cfg, model, data_dir, device, transform):
-    evaluator = VOCAPIEvaluator(cfg=cfg,
-                                data_dir=data_dir,
-                                device=device,
-                                transform=transform,
-                                display=True)
-
-    # VOC evaluation
-    evaluator.evaluate(model)
-
-def coco_test(cfg, model, data_dir, device, transform):
-    # eval
-    evaluator = COCOAPIEvaluator(
-                    cfg=cfg,
-                    data_dir=data_dir,
-                    device=device,
-                    transform=transform)
-
-    # COCO evaluation
-    evaluator.evaluate(model)
-
-def custom_test(cfg, model, data_dir, device, transform):
-    evaluator = CustomEvaluator(
-        cfg=cfg,
-        data_dir=data_dir,
-        device=device,
-        image_set='val',
-        transform=transform)
-
-    # WiderFace evaluation
-    evaluator.evaluate(model)
-
-
 if __name__ == '__main__':
     args = parse_args()
     # cuda
@@ -107,10 +71,10 @@ if __name__ == '__main__':
     model.to(device).eval()
 
     # evaluation
-    with torch.no_grad():
-        if args.dataset == 'voc':
-            voc_test(cfg, model, args.root, device, transform)
-        elif args.dataset == 'coco':
-            coco_test(cfg, model, args.root, device, transform)
-        elif args.dataset == 'custom':
-            custom_test(cfg, model, args.root, device, transform)
+    evaluator = MapEvaluator(cfg = cfg,
+                             dataset_name = args.dataset,
+                             data_dir  = args.root,
+                             device    = device,
+                             transform = transform
+                             )
+    evaluator.evaluate(model)

+ 0 - 33
yolo/evaluator/build.py

@@ -1,33 +0,0 @@
-import os
-
-from evaluator.coco_evaluator import COCOAPIEvaluator
-from evaluator.voc_evaluator import VOCAPIEvaluator
-from evaluator.custom_evaluator import CustomEvaluator
-
-
-
-def build_evluator(args, cfg, transform, device):
-    # Evaluator
-    ## VOC Evaluator
-    if args.dataset == 'voc':
-        evaluator = VOCAPIEvaluator(cfg       = cfg,
-                                    data_dir  = args.root,
-                                    device    = device,
-                                    transform = transform
-                                    )
-    ## COCO Evaluator
-    elif args.dataset == 'coco':
-        evaluator = COCOAPIEvaluator(cfg       = cfg,
-                                     data_dir  = args.root,
-                                     device    = device,
-                                     transform = transform
-                                     )
-    ## Custom dataset Evaluator
-    elif args.dataset == 'ourdataset':
-        evaluator = CustomEvaluator(cfg       = cfg,
-                                      data_dir  = args.root,
-                                      device    = device,
-                                      transform = transform
-                                      )
-
-    return evaluator

+ 0 - 111
yolo/evaluator/custom_evaluator.py

@@ -1,111 +0,0 @@
-import json
-import tempfile
-import torch
-from dataset.custom import CustomDataset
-from utils.box_ops import rescale_bboxes
-
-try:
-    from pycocotools.cocoeval import COCOeval
-except:
-    print("It seems that the COCOAPI is not installed.")
-
-
-class CustomEvaluator():
-    def __init__(self, cfg, data_dir, device, image_set='val', transform=None):
-        # ----------------- Basic parameters -----------------
-        self.image_set = image_set
-        self.transform = transform
-        self.device = device
-        # ----------------- Metrics -----------------
-        self.map = 0.
-        self.ap50_95 = 0.
-        self.ap50 = 0.
-        # ----------------- Dataset -----------------
-        self.dataset = CustomDataset(cfg, data_dir=data_dir, image_set=image_set, transform=None, is_train=False)
-
-    @torch.no_grad()
-    def evaluate(self, model):
-        """
-        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
-        and the results are evaluated by COCO API.
-        Args:
-            model : model object
-        Returns:
-            ap50_95 (float) : calculated COCO AP for IoU=50:95
-            ap50 (float) : calculated COCO AP for IoU=50
-        """
-        model.eval()
-        ids = []
-        data_dict = []
-        num_images = len(self.dataset)
-        print('total number of images: %d' % (num_images))
-
-        # --------------- COCO-style evaluation ---------------
-        for index in range(num_images):
-            if index % 500 == 0:
-                print('[Eval: %d / %d]'%(index, num_images))
-
-            # ----------- Load an image -----------
-            img, img_id = self.dataset.pull_image(index)
-            orig_h, orig_w, _ = img.shape
-
-            # ----------- Data preprocess -----------
-            x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device)
-            
-            img_id = int(img_id)
-            ids.append(img_id)
-
-            # ----------- Model inference -----------
-            outputs = model(x)
-            scores = outputs['scores']
-            labels = outputs['labels']
-            bboxes = outputs['bboxes']
-
-            # ----------- Rescale bboxes -----------
-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
-
-            # ----------- Process results -----------
-            for i, box in enumerate(bboxes):
-                x1 = float(box[0])
-                y1 = float(box[1])
-                x2 = float(box[2])
-                y2 = float(box[3])
-                label = self.dataset.class_ids[int(labels[i])]
-                
-                # COCO box format: x1, y1, bw, bh
-                bbox = [x1, y1, x2 - x1, y2 - y1]
-                score = float(scores[i])
-                 # COCO json format
-                A = {"image_id":    img_id,
-                     "category_id": label,
-                     "bbox":        bbox,
-                     "score":       score}
-                data_dict.append(A)
-
-        annType = ['segm', 'bbox', 'keypoints']
-
-        # ------------- COCO Box detection evaluation -------------
-        if len(data_dict) > 0:
-            print('evaluating ......')
-            cocoGt = self.dataset.coco
-            _, tmp = tempfile.mkstemp()
-            json.dump(data_dict, open(tmp, 'w'))
-            cocoDt = cocoGt.loadRes(tmp)
-            cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
-            cocoEval.params.imgIds = ids
-            cocoEval.evaluate()
-            cocoEval.accumulate()
-            cocoEval.summarize()
-
-            ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
-            print('ap50_95 : ', ap50_95)
-            print('ap50 : ', ap50)
-            self.map = ap50_95
-            self.ap50_95 = ap50_95
-            self.ap50 = ap50
-
-            return ap50, ap50_95
-        else:
-            return 0, 0
-

+ 9 - 4
yolo/evaluator/coco_evaluator.py → yolo/evaluator/map_evaluator.py

@@ -4,13 +4,13 @@ import torch
 from pycocotools.cocoeval import COCOeval
 
 from dataset.coco import COCODataset
+from dataset.voc  import VOCDataset
 from utils.box_ops import rescale_bboxes
 
 
-class COCOAPIEvaluator():
-    def __init__(self, cfg, data_dir, device, transform=None):
+class MapEvaluator():
+    def __init__(self, dataset_name, cfg, data_dir, device, transform=None):
         # ----------------- Basic parameters -----------------
-        self.image_set = 'val2017'
         self.transform = transform
         self.device = device
         # ----------------- Metrics -----------------
@@ -18,7 +18,12 @@ class COCOAPIEvaluator():
         self.ap50_95 = 0.
         self.ap50 = 0.
         # ----------------- Dataset -----------------
-        self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, image_set=self.image_set, transform=None, is_train=False)
+        if   dataset_name == "coco":
+            self.dataset = COCODataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
+        elif dataset_name == "voc":
+            self.dataset = VOCDataset(cfg=cfg, data_dir=data_dir, transform=None, is_train=False)
+        else:
+            raise NotImplementedError("Unknown dataset name.")
 
     @torch.no_grad()
     def evaluate(self, model):

+ 0 - 348
yolo/evaluator/voc_evaluator.py

@@ -1,348 +0,0 @@
-"""Adapted from:
-    @longcw faster_rcnn_pytorch: https://github.com/longcw/faster_rcnn_pytorch
-    @rbgirshick py-faster-rcnn https://github.com/rbgirshick/py-faster-rcnn
-    Licensed under The MIT License [see LICENSE for details]
-"""
-
-from dataset.voc import VOCDataset, VOC_CLASSES
-import os
-import time
-import numpy as np
-import pickle
-import xml.etree.ElementTree as ET
-
-from utils.box_ops import rescale_bboxes
-
-
-class VOCAPIEvaluator():
-    """ VOC AP Evaluation class """
-    def __init__(self,
-                 cfg,
-                 data_dir, 
-                 device,
-                 transform,
-                 set_type='test', 
-                 year='2007', 
-                 display=False):
-        # basic config
-        self.data_dir = data_dir
-        self.device = device
-        self.labelmap = VOC_CLASSES
-        self.set_type = set_type
-        self.year = year
-        self.display = display
-        self.map = 0.
-
-        # transform
-        self.transform = transform
-
-        # path
-        time_stamp = time.strftime('%Y-%m-%d_%H-%M-%S',time.localtime(time.time()))
-        self.devkit_path = os.path.join(data_dir, 'VOC' + year)
-        self.annopath = os.path.join(data_dir, 'VOC2007', 'Annotations', '%s.xml')
-        self.imgpath = os.path.join(data_dir, 'VOC2007', 'JPEGImages', '%s.jpg')
-        self.imgsetpath = os.path.join(data_dir, 'VOC2007', 'ImageSets', 'Main', set_type+'.txt')
-        self.output_dir = self.get_output_dir('det_results/eval/voc_eval/{}'.format(time_stamp), self.set_type)
-
-        # dataset
-        self.dataset = VOCDataset(
-            cfg=cfg,
-            data_dir=data_dir, 
-            image_set=[('2007', set_type)],
-            is_train=False)
-        
-    def evaluate(self, net):
-        net.eval()
-        num_images = len(self.dataset)
-        # all detections are collected into:
-        #    all_boxes[cls][image] = N x 5 array of detections in
-        #    (x1, y1, x2, y2, score)
-        self.all_boxes = [[[] for _ in range(num_images)]
-                        for _ in range(len(self.labelmap))]
-
-        # timers
-        det_file = os.path.join(self.output_dir, 'detections.pkl')
-
-        for i in range(num_images):
-            img, _ = self.dataset.pull_image(i)
-            orig_h, orig_w = img.shape[:2]
-
-            # preprocess
-            x, _, ratio = self.transform(img)
-            x = x.unsqueeze(0).to(self.device)
-
-            # forward
-            t0 = time.time()
-            outputs = net(x)
-            scores = outputs['scores']
-            labels = outputs['labels']
-            bboxes = outputs['bboxes']
-            detect_time = time.time() - t0
-
-            # rescale bboxes
-            bboxes = rescale_bboxes(bboxes, [orig_w, orig_h], ratio)
-
-            for j in range(len(self.labelmap)):
-                inds = np.where(labels == j)[0]
-                if len(inds) == 0:
-                    self.all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
-                    continue
-                c_bboxes = bboxes[inds]
-                c_scores = scores[inds]
-                c_dets = np.hstack((c_bboxes,
-                                    c_scores[:, np.newaxis])).astype(np.float32,
-                                                                    copy=False)
-                self.all_boxes[j][i] = c_dets
-
-            if i % 500 == 0:
-                print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images, detect_time))
-
-        with open(det_file, 'wb') as f:
-            pickle.dump(self.all_boxes, f, pickle.HIGHEST_PROTOCOL)
-
-        print('Evaluating detections')
-        self.evaluate_detections(self.all_boxes)
-
-        print('Mean AP: ', self.map)
-  
-    def parse_rec(self, filename):
-        """ Parse a PASCAL VOC xml file """
-        tree = ET.parse(filename)
-        objects = []
-        for obj in tree.findall('object'):
-            obj_struct = {}
-            obj_struct['name'] = obj.find('name').text
-            obj_struct['pose'] = obj.find('pose').text
-            obj_struct['truncated'] = int(obj.find('truncated').text)
-            obj_struct['difficult'] = int(obj.find('difficult').text)
-            bbox = obj.find('bndbox')
-            obj_struct['bbox'] = [int(bbox.find('xmin').text),
-                                int(bbox.find('ymin').text),
-                                int(bbox.find('xmax').text),
-                                int(bbox.find('ymax').text)]
-            objects.append(obj_struct)
-
-        return objects
-
-    def get_output_dir(self, name, phase):
-        """Return the directory where experimental artifacts are placed.
-        If the directory does not exist, it is created.
-        A canonical path is built using the name from an imdb and a network
-        (if not None).
-        """
-        filedir = os.path.join(name, phase)
-        if not os.path.exists(filedir):
-            os.makedirs(filedir, exist_ok=True)
-        return filedir
-
-    def get_voc_results_file_template(self, cls):
-        # VOCdevkit/VOC2007/results/det_test_aeroplane.txt
-        filename = 'det_' + self.set_type + '_%s.txt' % (cls)
-        filedir = os.path.join(self.devkit_path, 'results')
-        if not os.path.exists(filedir):
-            os.makedirs(filedir)
-        path = os.path.join(filedir, filename)
-        return path
-
-    def write_voc_results_file(self, all_boxes):
-        for cls_ind, cls in enumerate(self.labelmap):
-            if self.display:
-                print('Writing {:s} VOC results file'.format(cls))
-            filename = self.get_voc_results_file_template(cls)
-            with open(filename, 'wt') as f:
-                for im_ind, index in enumerate(self.dataset.ids):
-                    dets = all_boxes[cls_ind][im_ind]
-                    if len(dets) == 0:
-                        continue
-                    # the VOCdevkit expects 1-based indices
-                    for k in range(dets.shape[0]):
-                        f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
-                                format(index[1], dets[k, -1],
-                                    dets[k, 0] + 1, dets[k, 1] + 1,
-                                    dets[k, 2] + 1, dets[k, 3] + 1))
-
-    def do_python_eval(self, use_07=True):
-        cachedir = os.path.join(self.devkit_path, 'annotations_cache')
-        aps = []
-        # The PASCAL VOC metric changed in 2010
-        use_07_metric = use_07
-        print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
-        if not os.path.isdir(self.output_dir):
-            os.mkdir(self.output_dir)
-        for i, cls in enumerate(self.labelmap):
-            filename = self.get_voc_results_file_template(cls)
-            rec, prec, ap = self.voc_eval(detpath=filename, 
-                                          classname=cls, 
-                                          cachedir=cachedir, 
-                                          ovthresh=0.5, 
-                                          use_07_metric=use_07_metric
-                                        )
-            aps += [ap]
-            print('AP for {} = {:.4f}'.format(cls, ap))
-            with open(os.path.join(self.output_dir, cls + '_pr.pkl'), 'wb') as f:
-                pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
-        if self.display:
-            self.map = np.mean(aps)
-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
-            print('~~~~~~~~')
-            print('Results:')
-            for ap in aps:
-                print('{:.3f}'.format(ap))
-            print('{:.3f}'.format(np.mean(aps)))
-            print('~~~~~~~~')
-            print('')
-            print('--------------------------------------------------------------')
-            print('Results computed with the **unofficial** Python eval code.')
-            print('Results should be very close to the official MATLAB eval code.')
-            print('--------------------------------------------------------------')
-        else:
-            self.map = np.mean(aps)
-            print('Mean AP = {:.4f}'.format(np.mean(aps)))
-
-    def voc_ap(self, rec, prec, use_07_metric=True):
-        """ ap = voc_ap(rec, prec, [use_07_metric])
-        Compute VOC AP given precision and recall.
-        If use_07_metric is true, uses the
-        VOC 07 11 point method (default:True).
-        """
-        if use_07_metric:
-            # 11 point metric
-            ap = 0.
-            for t in np.arange(0., 1.1, 0.1):
-                if np.sum(rec >= t) == 0:
-                    p = 0
-                else:
-                    p = np.max(prec[rec >= t])
-                ap = ap + p / 11.
-        else:
-            # correct AP calculation
-            # first append sentinel values at the end
-            mrec = np.concatenate(([0.], rec, [1.]))
-            mpre = np.concatenate(([0.], prec, [0.]))
-
-            # compute the precision envelope
-            for i in range(mpre.size - 1, 0, -1):
-                mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
-
-            # to calculate area under PR curve, look for points
-            # where X axis (recall) changes value
-            i = np.where(mrec[1:] != mrec[:-1])[0]
-
-            # and sum (\Delta recall) * prec
-            ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
-        return ap
-
-    def voc_eval(self, detpath, classname, cachedir, ovthresh=0.5, use_07_metric=True):
-        if not os.path.isdir(cachedir):
-            os.mkdir(cachedir)
-        cachefile = os.path.join(cachedir, 'annots.pkl')
-        # read list of images
-        with open(self.imgsetpath, 'r') as f:
-            lines = f.readlines()
-        imagenames = [x.strip() for x in lines]
-        if not os.path.isfile(cachefile):
-            # load annots
-            recs = {}
-            for i, imagename in enumerate(imagenames):
-                recs[imagename] = self.parse_rec(self.annopath % (imagename))
-                if i % 100 == 0 and self.display:
-                    print('Reading annotation for {:d}/{:d}'.format(
-                    i + 1, len(imagenames)))
-            # save
-            if self.display:
-                print('Saving cached annotations to {:s}'.format(cachefile))
-            with open(cachefile, 'wb') as f:
-                pickle.dump(recs, f)
-        else:
-            # load
-            with open(cachefile, 'rb') as f:
-                recs = pickle.load(f)
-
-        # extract gt objects for this class
-        class_recs = {}
-        npos = 0
-        for imagename in imagenames:
-            R = [obj for obj in recs[imagename] if obj['name'] == classname]
-            bbox = np.array([x['bbox'] for x in R])
-            difficult = np.array([x['difficult'] for x in R]).astype(np.bool_)
-            det = [False] * len(R)
-            npos = npos + sum(~difficult)
-            class_recs[imagename] = {'bbox': bbox,
-                                    'difficult': difficult,
-                                    'det': det}
-
-        # read dets
-        detfile = detpath.format(classname)
-        with open(detfile, 'r') as f:
-            lines = f.readlines()
-        if any(lines) == 1:
-
-            splitlines = [x.strip().split(' ') for x in lines]
-            image_ids = [x[0] for x in splitlines]
-            confidence = np.array([float(x[1]) for x in splitlines])
-            BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
-
-            # sort by confidence
-            sorted_ind = np.argsort(-confidence)
-            sorted_scores = np.sort(-confidence)
-            BB = BB[sorted_ind, :]
-            image_ids = [image_ids[x] for x in sorted_ind]
-
-            # go down dets and mark TPs and FPs
-            nd = len(image_ids)
-            tp = np.zeros(nd)
-            fp = np.zeros(nd)
-            for d in range(nd):
-                R = class_recs[image_ids[d]]
-                bb = BB[d, :].astype(float)
-                ovmax = -np.inf
-                BBGT = R['bbox'].astype(float)
-                if BBGT.size > 0:
-                    # compute overlaps
-                    # intersection
-                    ixmin = np.maximum(BBGT[:, 0], bb[0])
-                    iymin = np.maximum(BBGT[:, 1], bb[1])
-                    ixmax = np.minimum(BBGT[:, 2], bb[2])
-                    iymax = np.minimum(BBGT[:, 3], bb[3])
-                    iw = np.maximum(ixmax - ixmin, 0.)
-                    ih = np.maximum(iymax - iymin, 0.)
-                    inters = iw * ih
-                    uni = ((bb[2] - bb[0]) * (bb[3] - bb[1]) +
-                        (BBGT[:, 2] - BBGT[:, 0]) *
-                        (BBGT[:, 3] - BBGT[:, 1]) - inters)
-                    overlaps = inters / uni
-                    ovmax = np.max(overlaps)
-                    jmax = np.argmax(overlaps)
-
-                if ovmax > ovthresh:
-                    if not R['difficult'][jmax]:
-                        if not R['det'][jmax]:
-                            tp[d] = 1.
-                            R['det'][jmax] = 1
-                        else:
-                            fp[d] = 1.
-                else:
-                    fp[d] = 1.
-
-            # compute precision recall
-            fp = np.cumsum(fp)
-            tp = np.cumsum(tp)
-            rec = tp / float(npos)
-            # avoid divide by zero in case the first detection matches a difficult
-            # ground truth
-            prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
-            ap = self.voc_ap(rec, prec, use_07_metric)
-        else:
-            rec = -1.
-            prec = -1.
-            ap = -1.
-
-        return rec, prec, ap
-
-    def evaluate_detections(self, box_list):
-        self.write_voc_results_file(box_list)
-        self.do_python_eval()
-
-
-if __name__ == '__main__':
-    pass

+ 2 - 2
yolo/models/__init__.py

@@ -50,7 +50,7 @@ def build_model(args, cfg, is_val=False):
 
     if is_val:
         # ------------ Load pretrained weight ------------
-        if args.pretrained is not None:
+        if hasattr(args, "pretrained") and args.pretrained is not None:
             print('Loading COCO pretrained weight ...')
             checkpoint = torch.load(args.pretrained, map_location='cpu')
             # checkpoint state dict
@@ -72,7 +72,7 @@ def build_model(args, cfg, is_val=False):
             model.load_state_dict(checkpoint_state_dict, strict=False)
 
         # ------------ Keep training from the given checkpoint ------------
-        if args.resume and args.resume != "None":
+        if hasattr(args, "resume") and args.resume and args.resume != "None":
             checkpoint = torch.load(args.resume, map_location='cpu')
             # checkpoint state dict
             try:

+ 163 - 0
yolo/tools/convert_voc_to_coco.py

@@ -0,0 +1,163 @@
+import cv2
+import random
+import numpy as np
+import os.path as osp
+import xml.etree.ElementTree as ET
+import torch.utils.data as data
+
+voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
+
+
+class VOCAnnotationTransform(object):
+    def __init__(self, class_to_ind=None, keep_difficult=False):
+        self.class_to_ind = class_to_ind or dict(
+            zip(voc_class_labels, range(len(voc_class_labels))))
+        self.keep_difficult = keep_difficult
+
+    def __call__(self, target):
+        res = []
+        for obj in target.iter('object'):
+            difficult = int(obj.find('difficult').text) == 1
+            if not self.keep_difficult and difficult:
+                continue
+            name = obj.find('name').text.lower().strip()
+            bbox = obj.find('bndbox')
+
+            pts = ['xmin', 'ymin', 'xmax', 'ymax']
+            bndbox = []
+            for i, pt in enumerate(pts):
+                cur_pt = int(bbox.find(pt).text) - 1
+                bndbox.append(cur_pt)
+            label_idx = self.class_to_ind[name]
+            bndbox.append(label_idx)
+            res += [bndbox]  # [x1, y1, x2, y2, label_ind]
+
+        return res  # [[x1, y1, x2, y2, label_ind], ... ]
+
+class VOCDataset(data.Dataset):
+    def __init__(self, 
+                 root   :str = None, 
+                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
+                 is_train   :bool =False,
+                 ):
+        # ----------- Basic parameters -----------
+        self.image_set = image_set
+        self.is_train  = is_train
+        self.num_classes = 20
+        # ----------- Path parameters -----------
+        self.root = root
+        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
+        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
+        # ----------- Data parameters -----------
+        self.ids = list()
+        for (year, name) in image_set:
+            rootpath = osp.join(self.root, 'VOC' + year)
+            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
+                self.ids.append((rootpath, line.strip()))
+        self.dataset_size = len(self.ids)
+        self.class_labels = voc_class_labels
+        self.class_indexs = voc_class_indexs
+        # ----------- Transform parameters -----------
+        self.target_transform = VOCAnnotationTransform()
+
+    def __len__(self):
+        return self.dataset_size
+
+    def pull_item(self, index):
+        # load an image
+        img_id = self.ids[index]
+        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
+        height, width, channels = image.shape
+
+        # laod an annotation
+        anno = ET.parse(self._annopath % img_id).getroot()
+        anno = self.target_transform(anno)
+
+        # guard against no boxes via resizing
+        anno = np.array(anno).reshape(-1, 5)
+        bboxes = anno[:, :4]  # [N, 4]
+        labels = anno[:, 4]   # [N,]
+        target = {
+            "file_name": "{}.jpg".format(img_id[-1]),
+            "bboxes": bboxes,
+            "labels": labels,
+            "orig_size": [height, width],
+            "id": index,
+        }
+        
+        return target
+
+
+if __name__ == "__main__":
+    import json
+
+    # json_file = "D:\\python_work\\dataset\\COCO\\annotations\\instances_val2017.json"
+    # with open(json_file, 'r') as f:
+    #     data_dict = json.load(f)
+    # print(data_dict['info'])
+    # print(data_dict.keys())
+    # print(len(data_dict["annotations"]))
+    # print(len(data_dict["images"]))
+    # print(data_dict["images"][0])
+    # print(data_dict["images"][1])
+    # print(data_dict["images"][2])
+    # print(data_dict["annotations"][0])
+    # print(data_dict["annotations"][1])
+    # print(data_dict["annotations"][2])
+    # exit()
+
+    # opt
+    is_train = False
+    dataset = VOCDataset(root='D:/python_work/dataset/VOCdevkit/',
+                         image_set=[('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')],
+                         is_train=is_train,
+                         )
+    
+    print('Data length: ', len(dataset))
+
+    coco_dict = {
+        "images": [],
+        "annotations": [],
+        "type": "instances",
+        "categories": [{'supercategory': name, "id": i, 'name': name} for i, name in enumerate(voc_class_labels)]
+    }
+    anno_id = 0
+    for i in range(len(dataset)):
+        if i % 1000 == 0:
+            print(" - [{}] / [{}] ...".format(i, len(dataset)))
+
+        target = dataset.pull_item(i)
+
+        # images info.
+        file_name = target["file_name"]
+        height, width = target["orig_size"]
+        id = int(target["id"])
+
+        coco_dict["images"].append({
+            'file_name': file_name,
+            'height': height,
+            'width': width,
+            'id': id
+        })
+
+        # annotation info.
+        bboxes = target["bboxes"]
+        labels = target["labels"]
+
+        for bbox, label in zip(bboxes, labels):
+            x1, y1, x2, y2 = bbox
+            coco_dict["annotations"].append({
+                'bbox': [int(x1), int(y1), int(x2 - x1), int(y2 - y1)],
+                'area': int((x2 - x1) * (y2 - y1)),
+                'category_id': int(label),
+                'image_id': id,
+                'id': anno_id,
+                'iscrowd': 0,
+            })
+            anno_id += 1
+
+    json_file = "D:\\python_work\\dataset\\VOCdevkit\\annotations\\instances_val.json"
+    with open(json_file, 'w') as f:
+        json.dump(coco_dict, f, indent=4)
+    print(f"Data saved to {json_file}")

+ 7 - 2
yolo/train.py

@@ -23,7 +23,7 @@ from config import build_config
 from dataset.build import build_dataset, build_transform
 
 # ----------------- Evaluator Components -----------------
-from evaluator.build import build_evluator
+from evaluator.map_evaluator import MapEvaluator
 
 # ----------------- Model Components -----------------
 from models import build_model
@@ -149,7 +149,12 @@ def train():
     train_loader = build_dataloader(args, dataset, args.batch_size // world_size, CollateFunc())
 
     # ---------------------------- Build Evaluator ----------------------------
-    evaluator = build_evluator(args, cfg, val_transform, device)
+    evaluator = MapEvaluator(cfg = cfg,
+                             dataset_name = args.dataset,
+                             data_dir     = args.root,
+                             device       = device,
+                             transform    = val_transform
+                             )
 
     # ---------------------------- Build model ----------------------------
     ## Build model