| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- import os
- import json
- if __name__ == "__main__":
- import argparse
-
- parser = argparse.ArgumentParser(description='COCO-Dataset')
- # --------------- opt parameters ---------------
- parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
- help='data root')
- parser.add_argument('--image_set', type=str, default='val',
- help='augmentation type')
- parser.add_argument('--task', type=str, default='det',
- help='augmentation type')
-
- args = parser.parse_args()
- # --------------- load json ---------------
- if args.task == 'det':
- task_prefix = 'instances_{}2017.json'
- clean_task_prefix = 'instances_{}2017_clean.json'
- elif args.task == 'pos':
- task_prefix = 'person_keypoints_{}2017.json'
- clean_task_prefix = 'person_keypoints_{}2017_clean.json'
- else:
- raise NotImplementedError('Unkown task !')
-
- json_path = os.path.join(args.root, 'annotations', task_prefix.format(args.image_set))
- clean_json_file = dict()
- with open(json_path, 'r') as file:
- json_file = json.load(file)
- # json_file is a Dict: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
- clean_json_file['info'] = json_file['info']
- clean_json_file['licenses'] = json_file['licenses']
- clean_json_file['categories'] = json_file['categories']
- images_list = json_file['images']
- annots_list = json_file['annotations']
- num_images = len(images_list)
- # -------------- Filter annotations --------------
- print("Processing annotations ...")
- valid_image_ids = []
- clean_annots_list = []
- for i, anno in enumerate(annots_list):
- if i % 5000 == 0:
- print("[{}] / [{}] ...".format(i, len(annots_list)))
- x1, y1, bw, bh = anno['bbox']
- if bw > 0 and bh > 0:
- clean_annots_list.append(anno)
- if anno['image_id'] not in valid_image_ids:
- valid_image_ids.append(anno['image_id'])
- print("Valid number of images: ", len(valid_image_ids))
- print("Valid number of annots: ", len(clean_annots_list))
- print("Original number of annots: ", len(annots_list))
- # -------------- Filter images --------------
- print("Processing images ...")
- clean_images_list = []
- for i in range(num_images):
- if args.image_set == 'train' and i % 5000 == 0:
- print("[{}] / [{}] ...".format(i, num_images))
- if args.image_set == 'val' and i % 500 == 0:
- print("[{}] / [{}] ...".format(i, num_images))
-
- # A single image dict
- image_dict = images_list[i]
- image_id = image_dict['id']
- if image_id in valid_image_ids:
- clean_images_list.append(image_dict)
- print('Number of images after cleaning: ', len(clean_images_list))
- print('Number of annotations after cleaning: ', len(clean_annots_list))
- clean_json_file['images'] = clean_images_list
- clean_json_file['annotations'] = clean_annots_list
-
- # --------------- Save filterd json file ---------------
- new_json_path = os.path.join(args.root, 'annotations', clean_task_prefix.format(args.image_set))
- with open(new_json_path, 'w') as f:
- json.dump(clean_json_file, f)
|