convert_voc_to_coco.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. import cv2
  2. import random
  3. import numpy as np
  4. import os.path as osp
  5. import xml.etree.ElementTree as ET
  6. import torch.utils.data as data
  7. voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
  8. voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')
  9. class VOCAnnotationTransform(object):
  10. def __init__(self, class_to_ind=None, keep_difficult=False):
  11. self.class_to_ind = class_to_ind or dict(
  12. zip(voc_class_labels, range(len(voc_class_labels))))
  13. self.keep_difficult = keep_difficult
  14. def __call__(self, target):
  15. res = []
  16. for obj in target.iter('object'):
  17. difficult = int(obj.find('difficult').text) == 1
  18. if not self.keep_difficult and difficult:
  19. continue
  20. name = obj.find('name').text.lower().strip()
  21. bbox = obj.find('bndbox')
  22. pts = ['xmin', 'ymin', 'xmax', 'ymax']
  23. bndbox = []
  24. for i, pt in enumerate(pts):
  25. cur_pt = int(bbox.find(pt).text) - 1
  26. bndbox.append(cur_pt)
  27. label_idx = self.class_to_ind[name]
  28. bndbox.append(label_idx)
  29. res += [bndbox] # [x1, y1, x2, y2, label_ind]
  30. return res # [[x1, y1, x2, y2, label_ind], ... ]
  31. class VOCDataset(data.Dataset):
  32. def __init__(self,
  33. root :str = None,
  34. image_set = [('2007', 'trainval'), ('2012', 'trainval')],
  35. is_train :bool =False,
  36. ):
  37. # ----------- Basic parameters -----------
  38. self.image_set = image_set
  39. self.is_train = is_train
  40. self.num_classes = 20
  41. # ----------- Path parameters -----------
  42. self.root = root
  43. self._annopath = osp.join('%s', 'Annotations', '%s.xml')
  44. self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
  45. # ----------- Data parameters -----------
  46. self.ids = list()
  47. for (year, name) in image_set:
  48. rootpath = osp.join(self.root, 'VOC' + year)
  49. for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
  50. self.ids.append((rootpath, line.strip()))
  51. self.dataset_size = len(self.ids)
  52. self.class_labels = voc_class_labels
  53. self.class_indexs = voc_class_indexs
  54. # ----------- Transform parameters -----------
  55. self.target_transform = VOCAnnotationTransform()
  56. def __len__(self):
  57. return self.dataset_size
  58. def pull_item(self, index):
  59. # load an image
  60. img_id = self.ids[index]
  61. image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
  62. height, width, channels = image.shape
  63. # laod an annotation
  64. anno = ET.parse(self._annopath % img_id).getroot()
  65. anno = self.target_transform(anno)
  66. # guard against no boxes via resizing
  67. anno = np.array(anno).reshape(-1, 5)
  68. bboxes = anno[:, :4] # [N, 4]
  69. labels = anno[:, 4] # [N,]
  70. target = {
  71. "file_name": "{}.jpg".format(img_id[-1]),
  72. "bboxes": bboxes,
  73. "labels": labels,
  74. "orig_size": [height, width],
  75. "id": index,
  76. }
  77. return target
  78. if __name__ == "__main__":
  79. import json
  80. # json_file = "D:\\python_work\\dataset\\COCO\\annotations\\instances_val2017.json"
  81. # with open(json_file, 'r') as f:
  82. # data_dict = json.load(f)
  83. # print(data_dict['info'])
  84. # print(data_dict.keys())
  85. # print(len(data_dict["annotations"]))
  86. # print(len(data_dict["images"]))
  87. # print(data_dict["images"][0])
  88. # print(data_dict["images"][1])
  89. # print(data_dict["images"][2])
  90. # print(data_dict["annotations"][0])
  91. # print(data_dict["annotations"][1])
  92. # print(data_dict["annotations"][2])
  93. # exit()
  94. # opt
  95. is_train = False
  96. dataset = VOCDataset(root='D:/python_work/dataset/VOCdevkit/',
  97. image_set=[('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')],
  98. is_train=is_train,
  99. )
  100. print('Data length: ', len(dataset))
  101. coco_dict = {
  102. "images": [],
  103. "annotations": [],
  104. "type": "instances",
  105. "categories": [{'supercategory': name, "id": i, 'name': name} for i, name in enumerate(voc_class_labels)]
  106. }
  107. anno_id = 0
  108. for i in range(len(dataset)):
  109. if i % 1000 == 0:
  110. print(" - [{}] / [{}] ...".format(i, len(dataset)))
  111. target = dataset.pull_item(i)
  112. # images info.
  113. file_name = target["file_name"]
  114. height, width = target["orig_size"]
  115. id = int(target["id"])
  116. coco_dict["images"].append({
  117. 'file_name': file_name,
  118. 'height': height,
  119. 'width': width,
  120. 'id': id
  121. })
  122. # annotation info.
  123. bboxes = target["bboxes"]
  124. labels = target["labels"]
  125. for bbox, label in zip(bboxes, labels):
  126. x1, y1, x2, y2 = bbox
  127. coco_dict["annotations"].append({
  128. 'bbox': [int(x1), int(y1), int(x2 - x1), int(y2 - y1)],
  129. 'area': int((x2 - x1) * (y2 - y1)),
  130. 'category_id': int(label),
  131. 'image_id': id,
  132. 'id': anno_id,
  133. 'iscrowd': 0,
  134. })
  135. anno_id += 1
  136. json_file = "D:\\python_work\\dataset\\VOCdevkit\\annotations\\instances_val.json"
  137. with open(json_file, 'w') as f:
  138. json.dump(coco_dict, f, indent=4)
  139. print(f"Data saved to {json_file}")