2 سال پیش · b67eef8782
--- a/dataset/ourdataset.py
+++ b/dataset/ourdataset.py
@@ -0,0 +1,273 @@
 
				+import os
			
 
				+import cv2
			
 
				+import random
			
 
				+import numpy as np
			
 
				+import time
			
 
				+
			
 
				+from torch.utils.data import Dataset
			
 
				+
			
 
				+try:
			
 
				+    from pycocotools.coco import COCO
			
 
				+except:
			
 
				+    print("It seems that the COCOAPI is not installed.")
			
 
				+
			
 
				+try:
			
 
				+    from .data_augment import build_transform
			
 
				+    from .data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+except:
			
 
				+    from data_augment import build_transform
			
 
				+    from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				+
			
 
				+# please define our class labels
			
 
				+our_class_labels = ('cat',)
			
 
				+
			
 
				+
			
 
				+
			
 
				+class OurDataset(Dataset):
			
 
				+    """
			
 
				+    Our dataset class.
			
 
				+    """
			
 
				+    def __init__(self, 
			
 
				+                 img_size=640,
			
 
				+                 data_dir=None, 
			
 
				+                 image_set='train',
			
 
				+                 transform=None,
			
 
				+                 trans_config=None,
			
 
				+                 is_train=False):
			
 
				+        """
			
 
				+        COCO dataset initialization. Annotation data are read into memory by COCO API.
			
 
				+        Args:
			
 
				+            data_dir (str): dataset root directory
			
 
				+            json_file (str): COCO json file name
			
 
				+            name (str): COCO data name (e.g. 'train2017' or 'val2017')
			
 
				+            debug (bool): if True, only one data id is selected from the dataset
			
 
				+        """
			
 
				+        self.img_size = img_size
			
 
				+        self.image_set = image_set
			
 
				+        self.json_file = '{}.json'.format(image_set)
			
 
				+        self.data_dir = data_dir
			
 
				+        self.coco = COCO(os.path.join(self.data_dir, image_set, 'annotations', self.json_file))
			
 
				+        self.ids = self.coco.getImgIds()
			
 
				+        self.class_ids = sorted(self.coco.getCatIds())
			
 
				+        self.is_train = is_train
			
 
				+
			
 
				+        # augmentation
			
 
				+        self.transform = transform
			
 
				+        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				+        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				+        self.trans_config = trans_config
			
 
				+        print('==============================')
			
 
				+        print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				+        print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				+        print('==============================')
			
 
				+
			
 
				+
			
 
				+    def __len__(self):
			
 
				+        return len(self.ids)
			
 
				+
			
 
				+
			
 
				+    def __getitem__(self, index):
			
 
				+        return self.pull_item(index)
			
 
				+
			
 
				+
			
 
				+    def load_image_target(self, index):
			
 
				+        # load an image
			
 
				+        image, _ = self.pull_image(index)
			
 
				+        height, width, channels = image.shape
			
 
				+
			
 
				+        # load a target
			
 
				+        bboxes, labels = self.pull_anno(index)
			
 
				+
			
 
				+        target = {
			
 
				+            "boxes": bboxes,
			
 
				+            "labels": labels,
			
 
				+            "orig_size": [height, width]
			
 
				+        }
			
 
				+
			
 
				+        return image, target
			
 
				+
			
 
				+
			
 
				+    def load_mosaic(self, index):
			
 
				+        # load 4x mosaic image
			
 
				+        index_list = np.arange(index).tolist() + np.arange(index+1, len(self.ids)).tolist()
			
 
				+        id1 = index
			
 
				+        id2, id3, id4 = random.sample(index_list, 3)
			
 
				+        indexs = [id1, id2, id3, id4]
			
 
				+
			
 
				+        # load images and targets
			
 
				+        image_list = []
			
 
				+        target_list = []
			
 
				+        for index in indexs:
			
 
				+            img_i, target_i = self.load_image_target(index)
			
 
				+            image_list.append(img_i)
			
 
				+            target_list.append(target_i)
			
 
				+
			
 
				+        # Mosaic Augment
			
 
				+        if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				+            image, target = yolov5_mosaic_augment(
			
 
				+                image_list, target_list, self.img_size, self.trans_config)
			
 
				+                
			
 
				+        return image, target
			
 
				+
			
 
				+        
			
 
				+    def load_mixup(self, origin_image, origin_target):
			
 
				+        # YOLOv5 type Mixup
			
 
				+        if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
 
				+            new_index = np.random.randint(0, len(self.ids))
			
 
				+            new_image, new_target = self.load_mosaic(new_index)
			
 
				+            image, target = yolov5_mixup_augment(
			
 
				+                origin_image, origin_target, new_image, new_target)
			
 
				+        # YOLOX type Mixup
			
 
				+        elif self.trans_config['mixup_type'] == 'yolox_mixup':
			
 
				+            new_index = np.random.randint(0, len(self.ids))
			
 
				+            new_image, new_target = self.load_image_target(new_index)
			
 
				+            image, target = yolox_mixup_augment(
			
 
				+                origin_image, origin_target, new_image, new_target, self.img_size, self.trans_config['mixup_scale'])
			
 
				+
			
 
				+        return image, target
			
 
				+    
			
 
				+
			
 
				+    def pull_item(self, index):
			
 
				+        if random.random() < self.mosaic_prob:
			
 
				+            # load a mosaic image
			
 
				+            mosaic = True
			
 
				+            image, target = self.load_mosaic(index)
			
 
				+        else:
			
 
				+            mosaic = False
			
 
				+            # load an image and target
			
 
				+            image, target = self.load_image_target(index)
			
 
				+
			
 
				+        # MixUp
			
 
				+        if random.random() < self.mixup_prob:
			
 
				+            image, target = self.load_mixup(image, target)
			
 
				+
			
 
				+        # augment
			
 
				+        image, target, deltas = self.transform(image, target, mosaic)
			
 
				+
			
 
				+        return image, target, deltas
			
 
				+
			
 
				+
			
 
				+    def pull_image(self, index):
			
 
				+        id_ = self.ids[index]
			
 
				+        im_ann = self.coco.loadImgs(id_)[0] 
			
 
				+        img_file = os.path.join(
			
 
				+                self.data_dir, self.image_set, 'images', im_ann["file_name"])
			
 
				+        image = cv2.imread(img_file)
			
 
				+
			
 
				+        return image, id_
			
 
				+
			
 
				+
			
 
				+    def pull_anno(self, index):
			
 
				+        id_ = self.ids[index]
			
 
				+
			
 
				+        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None)
			
 
				+        annotations = self.coco.loadAnns(anno_ids)
			
 
				+        
			
 
				+        #load a target
			
 
				+        bboxes = []
			
 
				+        labels = []
			
 
				+        for anno in annotations:
			
 
				+            if 'bbox' in anno and anno['area'] > 0:
			
 
				+                # bbox
			
 
				+                x1 = np.max((0, anno['bbox'][0]))
			
 
				+                y1 = np.max((0, anno['bbox'][1]))
			
 
				+                x2 = x1 + anno['bbox'][2]
			
 
				+                y2 = y1 + anno['bbox'][3]
			
 
				+                if x2 < x1 or y2 < y1:
			
 
				+                    continue
			
 
				+                # class label
			
 
				+                cls_id = self.class_ids.index(anno['category_id'])
			
 
				+                
			
 
				+                bboxes.append([x1, y1, x2, y2])
			
 
				+                labels.append(cls_id)
			
 
				+
			
 
				+        # guard against no boxes via resizing
			
 
				+        bboxes = np.array(bboxes).reshape(-1, 4)
			
 
				+        labels = np.array(labels).reshape(-1)
			
 
				+        
			
 
				+        return bboxes, labels
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+    import sys
			
 
				+    from data_augment import build_transform
			
 
				+    sys.path.append('.')
			
 
				+    
			
 
				+    parser = argparse.ArgumentParser(description='Our-Dataset')
			
 
				+
			
 
				+    # opt
			
 
				+    parser.add_argument('--root', default='OurDataset',
			
 
				+                        help='data root')
			
 
				+    parser.add_argument('--split', default='train',
			
 
				+                        help='data split')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+    
			
 
				+    is_train = False
			
 
				+    img_size = 640
			
 
				+    yolov5_trans_config = {
			
 
				+        'aug_type': 'yolov5',
			
 
				+        # Basic Augment
			
 
				+        'degrees': 0.0,
			
 
				+        'translate': 0.2,
			
 
				+        'scale': 0.9,
			
 
				+        'shear': 0.0,
			
 
				+        'perspective': 0.0,
			
 
				+        'hsv_h': 0.015,
			
 
				+        'hsv_s': 0.7,
			
 
				+        'hsv_v': 0.4,
			
 
				+        # Mosaic & Mixup
			
 
				+        'mosaic_prob': 1.0,
			
 
				+        'mixup_prob': 0.15,
			
 
				+        'mosaic_type': 'yolov5_mosaic',
			
 
				+        'mixup_type': 'yolov5_mixup',
			
 
				+        'mixup_scale': [0.5, 1.5]
			
 
				+    }
			
 
				+    ssd_trans_config = {
			
 
				+        'aug_type': 'ssd',
			
 
				+        'mosaic_prob': 0.0,
			
 
				+        'mixup_prob': 0.0
			
 
				+    }
			
 
				+
			
 
				+    transform = build_transform(img_size, yolov5_trans_config, is_train)
			
 
				+
			
 
				+    dataset = OurDataset(
			
 
				+        img_size=img_size,
			
 
				+        data_dir=args.root,
			
 
				+        image_set='train',
			
 
				+        trans_config=yolov5_trans_config,
			
 
				+        transform=transform,
			
 
				+        is_train=is_train
			
 
				+        )
			
 
				+    
			
 
				+    np.random.seed(0)
			
 
				+    class_colors = [(np.random.randint(255),
			
 
				+                     np.random.randint(255),
			
 
				+                     np.random.randint(255)) for _ in range(80)]
			
 
				+    print('Data length: ', len(dataset))
			
 
				+
			
 
				+    for i in range(1000):
			
 
				+        image, target, deltas = dataset.pull_item(i)
			
 
				+        # to numpy
			
 
				+        image = image.permute(1, 2, 0).numpy()
			
 
				+        # to uint8
			
 
				+        image = image.astype(np.uint8)
			
 
				+        image = image.copy()
			
 
				+        img_h, img_w = image.shape[:2]
			
 
				+
			
 
				+        boxes = target["boxes"]
			
 
				+        labels = target["labels"]
			
 
				+
			
 
				+        for box, label in zip(boxes, labels):
			
 
				+            x1, y1, x2, y2 = box
			
 
				+            cls_id = int(label)
			
 
				+            color = class_colors[cls_id]
			
 
				+            # class name
			
 
				+            label = our_class_labels[cls_id]
			
 
				+            image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
			
 
				+            # put the test on the bbox
			
 
				+            cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
			
 
				+        cv2.imshow('gt', image)
			
 
				+        # cv2.imwrite(str(i)+'.jpg', img)
			
 
				+        cv2.waitKey(0)
			
--- a/eval.py
+++ b/eval.py
@@ -6,8 +6,10 @@ import torch
 
				 
			
 
				 from evaluator.voc_evaluator import VOCAPIEvaluator
			
 
				 from evaluator.coco_evaluator import COCOAPIEvaluator
			
 
				+from evaluator.ourdataset_evaluator import OurDatasetEvaluator
			
 
				 
			
 
				 # load transform
			
 
				+from dataset.ourdataset import our_class_labels
			
 
				 from dataset.data_augment import build_transform
			
 
				 
			
 
				 # load some utils
			
@@ -89,6 +91,17 @@ def coco_test(model, data_dir, device, transform, test=False):
 
				     evaluator.evaluate(model)
			
 
				 
			
 
				 
			
 
				+def our_test(model, data_dir, device, transform):
			
 
				+    evaluator = OurDatasetEvaluator(
			
 
				+        data_dir=data_dir,
			
 
				+        device=device,
			
 
				+        image_set='val',
			
 
				+        transform=transform)
			
 
				+
			
 
				+    # WiderFace evaluation
			
 
				+    evaluator.evaluate(model)
			
 
				+
			
 
				+
			
 
				 if __name__ == '__main__':
			
 
				     args = parse_args()
			
 
				     # cuda
			
@@ -111,6 +124,10 @@ if __name__ == '__main__':
 
				         print('eval on coco-test-dev ...')
			
 
				         num_classes = 80
			
 
				         data_dir = os.path.join(args.root, 'COCO')
			
 
				+    elif args.dataset == 'ourdataset':
			
 
				+        print('eval on crowdhuman ...')
			
 
				+        num_classes = len(our_class_labels)
			
 
				+        data_dir = os.path.join(args.root, 'OurDataset')
			
 
				     else:
			
 
				         print('unknow dataset !! we only support voc, coco-val, coco-test !!!')
			
 
				         exit(0)
			
@@ -147,3 +164,5 @@ if __name__ == '__main__':
 
				             coco_test(model, data_dir, device, transform, test=False)
			
 
				         elif args.dataset == 'coco-test':
			
 
				             coco_test(model, data_dir, device, transform, test=True)
			
 
				+        elif args.dataset == 'ourdataset':
			
 
				+            our_test(model, data_dir, device, transform)
			
--- a/evaluator/ourdataset_evaluator.py
+++ b/evaluator/ourdataset_evaluator.py
@@ -0,0 +1,120 @@
 
				+import json
			
 
				+import tempfile
			
 
				+import torch
			
 
				+from dataset.ourdataset import OurDataset
			
 
				+from utils.box_ops import rescale_bboxes
			
 
				+
			
 
				+try:
			
 
				+    from pycocotools.cocoeval import COCOeval
			
 
				+except:
			
 
				+    print("It seems that the COCOAPI is not installed.")
			
 
				+
			
 
				+
			
 
				+class OurDatasetEvaluator():
			
 
				+    """
			
 
				+    COCO AP Evaluation class.
			
 
				+    All the data in the val2017 dataset are processed \
			
 
				+    and evaluated by COCO API.
			
 
				+    """
			
 
				+    def __init__(self, data_dir, device, image_set='val', transform=None):
			
 
				+        """
			
 
				+        Args:
			
 
				+            data_dir (str): dataset root directory
			
 
				+            img_size (int): image size after preprocess. images are resized \
			
 
				+                to squares whose shape is (img_size, img_size).
			
 
				+            confthre (float):
			
 
				+                confidence threshold ranging from 0 to 1, \
			
 
				+                which is defined in the config file.
			
 
				+            nmsthre (float):
			
 
				+                IoU threshold of non-max supression ranging from 0 to 1.
			
 
				+        """
			
 
				+        self.dataset = OurDataset(data_dir=data_dir, image_set=image_set, is_train=False)
			
 
				+        self.image_set = image_set
			
 
				+        self.transform = transform
			
 
				+        self.device = device
			
 
				+
			
 
				+        self.map = 0.
			
 
				+        self.ap50_95 = 0.
			
 
				+        self.ap50 = 0.
			
 
				+
			
 
				+    @torch.no_grad()
			
 
				+    def evaluate(self, model):
			
 
				+        """
			
 
				+        COCO average precision (AP) Evaluation. Iterate inference on the test dataset
			
 
				+        and the results are evaluated by COCO API.
			
 
				+        Args:
			
 
				+            model : model object
			
 
				+        Returns:
			
 
				+            ap50_95 (float) : calculated COCO AP for IoU=50:95
			
 
				+            ap50 (float) : calculated COCO AP for IoU=50
			
 
				+        """
			
 
				+        model.eval()
			
 
				+        ids = []
			
 
				+        data_dict = []
			
 
				+        num_images = len(self.dataset)
			
 
				+        print('total number of images: %d' % (num_images))
			
 
				+
			
 
				+        # start testing
			
 
				+        for index in range(num_images): # all the data in val2017
			
 
				+            if index % 500 == 0:
			
 
				+                print('[Eval: %d / %d]'%(index, num_images))
			
 
				+
			
 
				+            # load an image
			
 
				+            img, id_ = self.dataset.pull_image(index)
			
 
				+            orig_h, orig_w, _ = img.shape
			
 
				+
			
 
				+            # preprocess
			
 
				+            x, _, deltas = self.transform(img)
			
 
				+            x = x.unsqueeze(0).to(self.device) / 255.
			
 
				+            
			
 
				+            id_ = int(id_)
			
 
				+            ids.append(id_)
			
 
				+            # inference
			
 
				+            outputs = model(x)
			
 
				+            bboxes, scores, cls_inds = outputs
			
 
				+
			
 
				+            # rescale bboxes
			
 
				+            origin_img_size = [orig_h, orig_w]
			
 
				+            cur_img_size = [*x.shape[-2:]]
			
 
				+            bboxes = rescale_bboxes(bboxes, origin_img_size, cur_img_size, deltas)
			
 
				+
			
 
				+            for i, box in enumerate(bboxes):
			
 
				+                x1 = float(box[0])
			
 
				+                y1 = float(box[1])
			
 
				+                x2 = float(box[2])
			
 
				+                y2 = float(box[3])
			
 
				+                label = self.dataset.class_ids[int(cls_inds[i])]
			
 
				+                
			
 
				+                bbox = [x1, y1, x2 - x1, y2 - y1]
			
 
				+                score = float(scores[i]) # object score * class score
			
 
				+                A = {"image_id": id_, "category_id": label, "bbox": bbox,
			
 
				+                     "score": score} # COCO json format
			
 
				+                data_dict.append(A)
			
 
				+
			
 
				+        annType = ['segm', 'bbox', 'keypoints']
			
 
				+
			
 
				+        # Evaluate the Dt (detection) json comparing with the ground truth
			
 
				+        if len(data_dict) > 0:
			
 
				+            print('evaluating ......')
			
 
				+            cocoGt = self.dataset.coco
			
 
				+            # workaround: temporarily write data to json file because pycocotools can't process dict in py36.
			
 
				+            _, tmp = tempfile.mkstemp()
			
 
				+            json.dump(data_dict, open(tmp, 'w'))
			
 
				+            cocoDt = cocoGt.loadRes(tmp)
			
 
				+            cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1])
			
 
				+            cocoEval.params.imgIds = ids
			
 
				+            cocoEval.evaluate()
			
 
				+            cocoEval.accumulate()
			
 
				+            cocoEval.summarize()
			
 
				+
			
 
				+            ap50_95, ap50 = cocoEval.stats[0], cocoEval.stats[1]
			
 
				+            print('ap50_95 : ', ap50_95)
			
 
				+            print('ap50 : ', ap50)
			
 
				+            self.map = ap50_95
			
 
				+            self.ap50_95 = ap50_95
			
 
				+            self.ap50 = ap50
			
 
				+
			
 
				+            return ap50, ap50_95
			
 
				+        else:
			
 
				+            return 0, 0
			
 
				+
			
--- a/tools/__init__.py
+++ b/tools/__init__.py
--- a/tools/convert_crowdhuman_to_coco.py
+++ b/tools/convert_crowdhuman_to_coco.py
@@ -0,0 +1,72 @@
 
				+import os
			
 
				+import numpy as np
			
 
				+import json
			
 
				+from PIL import Image
			
 
				+import argparse
			
 
				+
			
 
				+
			
 
				+
			
 
				+def parse_args():
			
 
				+    parser = argparse.ArgumentParser(description='YOLO-Tutorial')
			
 
				+
			
 
				+    # dataset
			
 
				+    parser.add_argument('--root', default='/mnt/share/ssd2/dataset/CrowdHuman/',
			
 
				+                        help='data root')
			
 
				+
			
 
				+    return parser.parse_args()
			
 
				+
			
 
				+
			
 
				+def load_func(fpath):
			
 
				+    print('fpath', fpath)
			
 
				+    assert os.path.exists(fpath)
			
 
				+    with open(fpath,'r') as fid:
			
 
				+        lines = fid.readlines()
			
 
				+    records =[json.loads(line.strip('\n')) for line in lines]
			
 
				+    return records
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    args = parse_args()
			
 
				+
			
 
				+    DATA_PATH = args.root
			
 
				+    OUT_PATH = DATA_PATH + 'annotations/'
			
 
				+    SPLITS = ['val', 'train']
			
 
				+    DEBUG = False
			
 
				+
			
 
				+    if not os.path.exists(OUT_PATH):
			
 
				+        os.mkdir(OUT_PATH)
			
 
				+    for split in SPLITS:
			
 
				+        data_path = DATA_PATH + split
			
 
				+        out_path = OUT_PATH + '{}.json'.format(split)
			
 
				+        out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]}
			
 
				+        ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split)
			
 
				+        anns_data = load_func(ann_path)
			
 
				+        image_cnt = 0
			
 
				+        ann_cnt = 0
			
 
				+        video_cnt = 0
			
 
				+        for ann_data in anns_data:
			
 
				+            image_cnt += 1
			
 
				+            file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + 'Images/' + '{}.jpg'.format(ann_data['ID'])
			
 
				+            im = Image.open(file_path)
			
 
				+            image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), 
			
 
				+                          'id': image_cnt,
			
 
				+                          'height': im.size[1], 
			
 
				+                          'width': im.size[0]}
			
 
				+            out['images'].append(image_info)
			
 
				+            if split != 'test':
			
 
				+                anns = ann_data['gtboxes']
			
 
				+                for i in range(len(anns)):
			
 
				+                    ann_cnt += 1
			
 
				+                    fbox = anns[i]['fbox']
			
 
				+                    ann = {'id': ann_cnt,
			
 
				+                         'category_id': 1,
			
 
				+                         'image_id': image_cnt,
			
 
				+                         'track_id': -1,
			
 
				+                         'bbox_vis': anns[i]['vbox'],
			
 
				+                         'bbox': fbox,
			
 
				+                         'area': fbox[2] * fbox[3],
			
 
				+                         'iscrowd': 1 if 'extra' in anns[i] and \
			
 
				+                                         'ignore' in anns[i]['extra'] and \
			
 
				+                                         anns[i]['extra']['ignore'] == 1 else 0}
			
 
				+                    out['annotations'].append(ann)
			
 
				+        print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations'])))
			
 
				+        json.dump(out, open(out_path, 'w'))
			
--- a/tools/convert_ours_to_coco.py
+++ b/tools/convert_ours_to_coco.py
@@ -0,0 +1,159 @@
 
				+import os
			
 
				+import json
			
 
				+import xml.etree.ElementTree as ET
			
 
				+import glob
			
 
				+
			
 
				+START_BOUNDING_BOX_ID = 1
			
 
				+PRE_DEFINE_CATEGORIES = None
			
 
				+# If necessary, pre-define category and its id
			
 
				+#  PRE_DEFINE_CATEGORIES = {"aeroplane": 1, "bicycle": 2, "bird": 3, "boat": 4,
			
 
				+#  "bottle":5, "bus": 6, "car": 7, "cat": 8, "chair": 9,
			
 
				+#  "cow": 10, "diningtable": 11, "dog": 12, "horse": 13,
			
 
				+#  "motorbike": 14, "person": 15, "pottedplant": 16,
			
 
				+#  "sheep": 17, "sofa": 18, "train": 19, "tvmonitor": 20}
			
 
				+
			
 
				+
			
 
				+def get(root, name):
			
 
				+    vars = root.findall(name)
			
 
				+    return vars
			
 
				+
			
 
				+
			
 
				+def get_and_check(root, name, length):
			
 
				+    vars = root.findall(name)
			
 
				+    if len(vars) == 0:
			
 
				+        raise ValueError("Can not find %s in %s." % (name, root.tag))
			
 
				+    if length > 0 and len(vars) != length:
			
 
				+        raise ValueError(
			
 
				+            "The size of %s is supposed to be %d, but is %d."
			
 
				+            % (name, length, len(vars))
			
 
				+        )
			
 
				+    if length == 1:
			
 
				+        vars = vars[0]
			
 
				+    return vars
			
 
				+
			
 
				+
			
 
				+def get_filename_as_int(filename):
			
 
				+    try:
			
 
				+        filename = filename.replace("\\", "/")
			
 
				+        filename = os.path.splitext(os.path.basename(filename))[0]
			
 
				+        return int(filename)
			
 
				+    except:
			
 
				+        raise ValueError("Filename %s is supposed to be an integer." % (filename))
			
 
				+
			
 
				+
			
 
				+def get_categories(xml_files):
			
 
				+    """Generate category name to id mapping from a list of xml files.
			
 
				+    
			
 
				+    Arguments:
			
 
				+        xml_files {list} -- A list of xml file paths.
			
 
				+    
			
 
				+    Returns:
			
 
				+        dict -- category name to id mapping.
			
 
				+    """
			
 
				+    classes_names = []
			
 
				+    for xml_file in xml_files:
			
 
				+        tree = ET.parse(xml_file)
			
 
				+        root = tree.getroot()
			
 
				+        for member in root.findall("object"):
			
 
				+            classes_names.append(member[0].text)
			
 
				+    classes_names = list(set(classes_names))
			
 
				+    classes_names.sort()
			
 
				+    return {name: i for i, name in enumerate(classes_names)}
			
 
				+
			
 
				+
			
 
				+def convert(xml_files, json_file):
			
 
				+    json_dict = {"images": [], "type": "instances", "annotations": [], "categories": []}
			
 
				+    if PRE_DEFINE_CATEGORIES is not None:
			
 
				+        categories = PRE_DEFINE_CATEGORIES
			
 
				+    else:
			
 
				+        categories = get_categories(xml_files)
			
 
				+    bnd_id = START_BOUNDING_BOX_ID
			
 
				+    for i, xml_file in enumerate(xml_files):
			
 
				+        if i % 100 == 0:
			
 
				+            print('[{}] / [{}]'.format(i, len(xml_files)))
			
 
				+        tree = ET.parse(xml_file)
			
 
				+        root = tree.getroot()
			
 
				+        path = get(root, "path")
			
 
				+        if len(path) == 1:
			
 
				+            filename = os.path.basename(path[0].text)
			
 
				+        elif len(path) == 0:
			
 
				+            filename = get_and_check(root, "filename", 1).text
			
 
				+        else:
			
 
				+            raise ValueError("%d paths found in %s" % (len(path), xml_file))
			
 
				+        ## The filename must be a number
			
 
				+        image_id = get_filename_as_int(filename)
			
 
				+        size = get_and_check(root, "size", 1)
			
 
				+        width = int(get_and_check(size, "width", 1).text)
			
 
				+        height = int(get_and_check(size, "height", 1).text)
			
 
				+        image = {
			
 
				+            "file_name": filename,
			
 
				+            "height": height,
			
 
				+            "width": width,
			
 
				+            "id": image_id,
			
 
				+        }
			
 
				+        json_dict["images"].append(image)
			
 
				+        ## Currently we do not support segmentation.
			
 
				+        #  segmented = get_and_check(root, 'segmented', 1).text
			
 
				+        #  assert segmented == '0'
			
 
				+        for obj in get(root, "object"):
			
 
				+            category = get_and_check(obj, "name", 1).text
			
 
				+            if category not in categories:
			
 
				+                new_id = len(categories)
			
 
				+                categories[category] = new_id
			
 
				+            category_id = categories[category]
			
 
				+            bndbox = get_and_check(obj, "bndbox", 1)
			
 
				+            xmin = int(get_and_check(bndbox, "xmin", 1).text) - 1
			
 
				+            ymin = int(get_and_check(bndbox, "ymin", 1).text) - 1
			
 
				+            xmax = int(get_and_check(bndbox, "xmax", 1).text)
			
 
				+            ymax = int(get_and_check(bndbox, "ymax", 1).text)
			
 
				+            assert xmax > xmin
			
 
				+            assert ymax > ymin
			
 
				+            o_width = abs(xmax - xmin)
			
 
				+            o_height = abs(ymax - ymin)
			
 
				+            ann = {
			
 
				+                "area": o_width * o_height,
			
 
				+                "iscrowd": 0,
			
 
				+                "image_id": image_id,
			
 
				+                "bbox": [xmin, ymin, o_width, o_height],
			
 
				+                "category_id": category_id,
			
 
				+                "id": bnd_id,
			
 
				+                "ignore": 0,
			
 
				+                "segmentation": [],
			
 
				+            }
			
 
				+            json_dict["annotations"].append(ann)
			
 
				+            bnd_id = bnd_id + 1
			
 
				+
			
 
				+    for cate, cid in categories.items():
			
 
				+        cat = {"supercategory": "none", "id": cid, "name": cate}
			
 
				+        json_dict["categories"].append(cat)
			
 
				+
			
 
				+    os.makedirs(os.path.dirname(json_file), exist_ok=True)
			
 
				+    json_fp = open(json_file, "w")
			
 
				+    json_str = json.dumps(json_dict)
			
 
				+    json_fp.write(json_str)
			
 
				+    json_fp.close()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(
			
 
				+        description="Convert VOC-style annotation labele by LabelImg to COCO format."
			
 
				+    )
			
 
				+    parser.add_argument("--root", help="Directory path to dataset.", type=str)
			
 
				+    parser.add_argument("--split", default='train', 
			
 
				+                        help="split of dataset.", type=str)
			
 
				+    parser.add_argument("-anno", "--annotations", default='annotations', 
			
 
				+                        help="Directory path to xml files.", type=str)
			
 
				+    parser.add_argument("-json", "--json_file", default='train.json',
			
 
				+                        help="Output COCO format json file.", type=str)
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    data_dir = os.path.join(args.root, args.split)
			
 
				+    anno_path = os.path.join(data_dir, args.annotations)
			
 
				+    xml_files = glob.glob(os.path.join(anno_path, "*.xml"))
			
 
				+    json_file = os.path.join(data_dir, args.annotations, '{}.json'.format(args.split))
			
 
				+    print("Number of xml files: {}".format(len(xml_files)))
			
 
				+    print("Converting to COCO format ...")
			
 
				+    convert(xml_files, json_file)
			
 
				+    print("Success: {}".format(args.json_file))
			
--- a/tools/convert_widerface_to_coco.py
+++ b/tools/convert_widerface_to_coco.py
@@ -0,0 +1,157 @@
 
				+import argparse
			
 
				+import json
			
 
				+import os
			
 
				+import os.path as osp
			
 
				+from PIL import Image
			
 
				+
			
 
				+
			
 
				+def parse_wider_gt(dets_file_name):
			
 
				+    # -----------------------------------------------------------------------------------------
			
 
				+    '''
			
 
				+      Parse the FDDB-format detection output file:
			
 
				+        - first line is image file name
			
 
				+        - second line is an integer, for `n` detections in that image
			
 
				+        - next `n` lines are detection coordinates
			
 
				+        - again, next line is image file name
			
 
				+        - detections are [x y width height score]
			
 
				+      Returns a dict: {'img_filename': detections as a list of arrays}
			
 
				+    '''
			
 
				+    fid = open(dets_file_name, 'r')
			
 
				+
			
 
				+    # Parsing the FDDB-format detection output txt file
			
 
				+    img_flag = True
			
 
				+    numdet_flag = False
			
 
				+    start_det_count = False
			
 
				+    det_count = 0
			
 
				+    numdet = -1
			
 
				+
			
 
				+    det_dict = {}
			
 
				+    img_file = ''
			
 
				+
			
 
				+    for line in fid:
			
 
				+        line = line.strip()
			
 
				+
			
 
				+        if line == '0 0 0 0 0 0 0 0 0 0':
			
 
				+            if det_count == numdet - 1:
			
 
				+                start_det_count = False
			
 
				+                det_count = 0
			
 
				+                img_flag = True  # next line is image file
			
 
				+                numdet_flag = False
			
 
				+                numdet = -1
			
 
				+                det_dict.pop(img_file)
			
 
				+            continue
			
 
				+
			
 
				+        if img_flag:
			
 
				+            # Image filename
			
 
				+            img_flag = False
			
 
				+            numdet_flag = True
			
 
				+            # print('Img file: ' + line)
			
 
				+            img_file = line
			
 
				+            det_dict[img_file] = []  # init detections list for image
			
 
				+            continue
			
 
				+
			
 
				+        if numdet_flag:
			
 
				+            # next line after image filename: number of detections
			
 
				+            numdet = int(line)
			
 
				+            numdet_flag = False
			
 
				+            if numdet > 0:
			
 
				+                start_det_count = True  # start counting detections
			
 
				+                det_count = 0
			
 
				+            else:
			
 
				+                # no detections in this image
			
 
				+                img_flag = True  # next line is another image file
			
 
				+                numdet = -1
			
 
				+
			
 
				+            # print 'num det: ' + line
			
 
				+            continue
			
 
				+
			
 
				+        if start_det_count:
			
 
				+            # after numdet, lines are detections
			
 
				+            detection = [float(x) for x in line.split()]  # split on whitespace
			
 
				+            det_dict[img_file].append(detection)
			
 
				+            # print 'Detection: %s' % line
			
 
				+            det_count += 1
			
 
				+
			
 
				+        if det_count == numdet:
			
 
				+            start_det_count = False
			
 
				+            det_count = 0
			
 
				+            img_flag = True  # next line is image file
			
 
				+            numdet_flag = False
			
 
				+            numdet = -1
			
 
				+
			
 
				+    return det_dict
			
 
				+
			
 
				+
			
 
				+def convert_wider_annots(args):
			
 
				+    """Convert from WIDER FDDB-style format to COCO bounding box"""
			
 
				+
			
 
				+    subset = ['train', 'val'] if args.subset == 'all' else [args.subset]
			
 
				+    outdir = os.path.join(args.datadir, args.outdir)
			
 
				+    os.makedirs(outdir, exist_ok=True)
			
 
				+
			
 
				+    categories = [{"id": 1, "name": 'face'}]
			
 
				+    for sset in subset:
			
 
				+        print(f'Processing subset {sset}')
			
 
				+        out_json_name = osp.join(outdir, f'{sset}.json')
			
 
				+        data_dir = osp.join(args.datadir, f'WIDER_{sset}', 'images')
			
 
				+        img_id = 0
			
 
				+        ann_id = 0
			
 
				+        cat_id = 1
			
 
				+
			
 
				+        ann_dict = {}
			
 
				+        images = []
			
 
				+        annotations = []
			
 
				+        ann_file = os.path.join(args.datadir, 'wider_face_split', f'wider_face_{sset}_bbx_gt.txt')
			
 
				+        wider_annot_dict = parse_wider_gt(ann_file)  # [im-file] = [[x,y,w,h], ...]
			
 
				+
			
 
				+        for filename in wider_annot_dict.keys():
			
 
				+            if len(images) % 100 == 0:
			
 
				+                print("Processed %s images, %s annotations" % (
			
 
				+                    len(images), len(annotations)))
			
 
				+
			
 
				+            image = {}
			
 
				+            image['id'] = img_id
			
 
				+            img_id += 1
			
 
				+            im = Image.open(os.path.join(data_dir, filename))
			
 
				+            image['width'] = im.height
			
 
				+            image['height'] = im.width
			
 
				+            image['file_name'] = filename
			
 
				+            images.append(image)
			
 
				+
			
 
				+            for gt_bbox in wider_annot_dict[filename]:
			
 
				+                ann = {}
			
 
				+                ann['id'] = ann_id
			
 
				+                ann_id += 1
			
 
				+                ann['image_id'] = image['id']
			
 
				+                ann['segmentation'] = []
			
 
				+                ann['category_id'] = cat_id  # 1:"face" for WIDER
			
 
				+                ann['iscrowd'] = 0
			
 
				+                ann['area'] = gt_bbox[2] * gt_bbox[3]
			
 
				+                ann['boxes'] = gt_bbox
			
 
				+                ann['bbox'] = gt_bbox[:4]
			
 
				+                annotations.append(ann)
			
 
				+
			
 
				+        ann_dict['images'] = images
			
 
				+        ann_dict['categories'] = categories
			
 
				+        ann_dict['annotations'] = annotations
			
 
				+        print("Num categories: %s" % len(categories))
			
 
				+        print("Num images: %s" % len(images))
			
 
				+        print("Num annotations: %s" % len(annotations))
			
 
				+        with open(out_json_name, 'w', encoding='utf8') as outfile:
			
 
				+            json.dump(ann_dict, outfile, indent=4, sort_keys=True)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    parser = argparse.ArgumentParser(description='Convert dataset')
			
 
				+    parser.add_argument(
			
 
				+        '-d', '--datadir', help="dir to widerface", default='data/widerface', type=str)
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '-s', '--subset', help="which subset to convert", default='all', choices=['all', 'train', 'val'], type=str)
			
 
				+
			
 
				+    parser.add_argument(
			
 
				+        '-o', '--outdir', help="where to output the annotation file, default same as data dir", default='annotations')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    convert_wider_annots(args)
			
--- a/utils/misc.py
+++ b/utils/misc.py
@@ -10,8 +10,11 @@ from copy import deepcopy
 
				 
			
 
				 from evaluator.coco_evaluator import COCOAPIEvaluator
			
 
				 from evaluator.voc_evaluator import VOCAPIEvaluator
			
 
				+from evaluator.ourdataset_evaluator import OurDatasetEvaluator
			
 
				+
			
 
				 from dataset.voc import VOCDetection, VOC_CLASSES
			
 
				 from dataset.coco import COCODataset, coco_class_index, coco_class_labels
			
 
				+from dataset.ourdataset import OurDataset, our_class_labels
			
 
				 from dataset.data_augment import build_transform
			
 
				 
			
 
				 from utils import fuse_conv_bn
			
@@ -73,6 +76,29 @@ def build_dataset(args, trans_config, device, is_train=False):
 
				             transform=val_transform
			
 
				             )
			
 
				 
			
 
				+    elif args.dataset == 'ourdataset':
			
 
				+        data_dir = os.path.join(args.root, 'OurDataset')
			
 
				+        class_names = our_class_labels
			
 
				+        num_classes = len(our_class_labels)
			
 
				+        class_indexs = None
			
 
				+
			
 
				+        # dataset
			
 
				+        dataset = OurDataset(
			
 
				+            data_dir=data_dir,
			
 
				+            img_size=args.img_size,
			
 
				+            image_set='train' if is_train else 'val',
			
 
				+            transform=train_transform,
			
 
				+            trans_config=trans_config,
			
 
				+            is_train=is_train
			
 
				+            )
			
 
				+        # evaluator
			
 
				+        evaluator = OurDatasetEvaluator(
			
 
				+            data_dir=data_dir,
			
 
				+            device=device,
			
 
				+            image_set='val',
			
 
				+            transform=val_transform
			
 
				+        )
			
 
				+
			
 
				     else:
			
 
				         print('unknow dataset !! Only support voc, coco !!')
			
 
				         exit(0)