convert_widerface_to_coco.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. import argparse
  2. import json
  3. import os
  4. import os.path as osp
  5. from PIL import Image
  6. def parse_wider_gt(dets_file_name):
  7. # -----------------------------------------------------------------------------------------
  8. '''
  9. Parse the FDDB-format detection output file:
  10. - first line is image file name
  11. - second line is an integer, for `n` detections in that image
  12. - next `n` lines are detection coordinates
  13. - again, next line is image file name
  14. - detections are [x y width height score]
  15. Returns a dict: {'img_filename': detections as a list of arrays}
  16. '''
  17. fid = open(dets_file_name, 'r')
  18. # Parsing the FDDB-format detection output txt file
  19. img_flag = True
  20. numdet_flag = False
  21. start_det_count = False
  22. det_count = 0
  23. numdet = -1
  24. det_dict = {}
  25. img_file = ''
  26. for line in fid:
  27. line = line.strip()
  28. if line == '0 0 0 0 0 0 0 0 0 0':
  29. if det_count == numdet - 1:
  30. start_det_count = False
  31. det_count = 0
  32. img_flag = True # next line is image file
  33. numdet_flag = False
  34. numdet = -1
  35. det_dict.pop(img_file)
  36. continue
  37. if img_flag:
  38. # Image filename
  39. img_flag = False
  40. numdet_flag = True
  41. # print('Img file: ' + line)
  42. img_file = line
  43. det_dict[img_file] = [] # init detections list for image
  44. continue
  45. if numdet_flag:
  46. # next line after image filename: number of detections
  47. numdet = int(line)
  48. numdet_flag = False
  49. if numdet > 0:
  50. start_det_count = True # start counting detections
  51. det_count = 0
  52. else:
  53. # no detections in this image
  54. img_flag = True # next line is another image file
  55. numdet = -1
  56. # print 'num det: ' + line
  57. continue
  58. if start_det_count:
  59. # after numdet, lines are detections
  60. detection = [float(x) for x in line.split()] # split on whitespace
  61. det_dict[img_file].append(detection)
  62. # print 'Detection: %s' % line
  63. det_count += 1
  64. if det_count == numdet:
  65. start_det_count = False
  66. det_count = 0
  67. img_flag = True # next line is image file
  68. numdet_flag = False
  69. numdet = -1
  70. return det_dict
  71. def convert_wider_annots(args):
  72. """Convert from WIDER FDDB-style format to COCO bounding box"""
  73. subset = ['train', 'val'] if args.subset == 'all' else [args.subset]
  74. outdir = os.path.join(args.datadir, args.outdir)
  75. os.makedirs(outdir, exist_ok=True)
  76. categories = [{"id": 1, "name": 'face'}]
  77. for sset in subset:
  78. print(f'Processing subset {sset}')
  79. out_json_name = osp.join(outdir, f'{sset}.json')
  80. data_dir = osp.join(args.datadir, f'WIDER_{sset}', 'images')
  81. img_id = 0
  82. ann_id = 0
  83. cat_id = 1
  84. ann_dict = {}
  85. images = []
  86. annotations = []
  87. ann_file = os.path.join(args.datadir, 'wider_face_split', f'wider_face_{sset}_bbx_gt.txt')
  88. wider_annot_dict = parse_wider_gt(ann_file) # [im-file] = [[x,y,w,h], ...]
  89. for filename in wider_annot_dict.keys():
  90. if len(images) % 100 == 0:
  91. print("Processed %s images, %s annotations" % (
  92. len(images), len(annotations)))
  93. image = {}
  94. image['id'] = img_id
  95. img_id += 1
  96. im = Image.open(os.path.join(data_dir, filename))
  97. image['width'] = im.height
  98. image['height'] = im.width
  99. image['file_name'] = filename
  100. images.append(image)
  101. for gt_bbox in wider_annot_dict[filename]:
  102. ann = {}
  103. ann['id'] = ann_id
  104. ann_id += 1
  105. ann['image_id'] = image['id']
  106. ann['segmentation'] = []
  107. ann['category_id'] = cat_id # 1:"face" for WIDER
  108. ann['iscrowd'] = 0
  109. ann['area'] = gt_bbox[2] * gt_bbox[3]
  110. ann['boxes'] = gt_bbox
  111. ann['bbox'] = gt_bbox[:4]
  112. annotations.append(ann)
  113. ann_dict['images'] = images
  114. ann_dict['categories'] = categories
  115. ann_dict['annotations'] = annotations
  116. print("Num categories: %s" % len(categories))
  117. print("Num images: %s" % len(images))
  118. print("Num annotations: %s" % len(annotations))
  119. with open(out_json_name, 'w', encoding='utf8') as outfile:
  120. json.dump(ann_dict, outfile, indent=4, sort_keys=True)
  121. if __name__ == '__main__':
  122. parser = argparse.ArgumentParser(description='Convert dataset')
  123. parser.add_argument(
  124. '-d', '--datadir', help="dir to widerface", default='data/widerface', type=str)
  125. parser.add_argument(
  126. '-s', '--subset', help="which subset to convert", default='all', choices=['all', 'train', 'val'], type=str)
  127. parser.add_argument(
  128. '-o', '--outdir', help="where to output the annotation file, default same as data dir", default='annotations')
  129. args = parser.parse_args()
  130. convert_wider_annots(args)