| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178 |
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
- """
- COCO dataset which returns image_id for evaluation.
- Mostly copy-paste from https://github.com/pytorch/vision/blob/13b35ff/references/detection/coco_utils.py
- """
- from pathlib import Path
- import torch
- import torch.utils.data
- import torchvision
- try:
- from .transforms import build_transform
- except:
- from transforms import build_transform
- coco_labels_91 = ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'street sign', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'hat', 'backpack', 'umbrella', 'shoe', 'eye glasses', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'plate', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'mirror', 'dining table', 'window', 'desk', 'toilet', 'door', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'blender', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
- coco_labels_80 = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush')
- coco_indexs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
- class CocoDetection(torchvision.datasets.CocoDetection):
- def __init__(self, img_folder, ann_file, transforms):
- super(CocoDetection, self).__init__(img_folder, ann_file)
- self.coco_labels = coco_labels_80 # 80 coco labels for detection task
- self.coco_indexs = coco_indexs # all original coco label index
- self._transforms = transforms
- def prepare(self, image, target):
- w, h = image.size
- # load an image
- image_id = target["image_id"]
- image_id = torch.tensor([image_id])
- # load an annotation
- anno = target["annotations"]
- anno = [obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0]
- # bbox target
- boxes = [obj["bbox"] for obj in anno]
- boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
- boxes[:, 2:] += boxes[:, :2]
- boxes[:, 0::2].clamp_(min=0, max=w)
- boxes[:, 1::2].clamp_(min=0, max=h)
- # class target
- classes = [self.coco_indexs.index(obj["category_id"]) for obj in anno]
- classes = torch.tensor(classes, dtype=torch.int64)
- # filter invalid bbox
- keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
- boxes = boxes[keep]
- classes = classes[keep]
- target = {}
- target["boxes"] = boxes
- target["labels"] = classes
- target["image_id"] = image_id
- # for conversion to coco api
- area = torch.tensor([obj["area"] for obj in anno])
- iscrowd = torch.tensor([obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
- target["area"] = area[keep]
- target["iscrowd"] = iscrowd[keep]
- target["orig_size"] = torch.as_tensor([int(h), int(w)])
- target["size"] = torch.as_tensor([int(h), int(w)])
- return image, target
- def __getitem__(self, idx):
- img, target = super(CocoDetection, self).__getitem__(idx)
- image_id = self.ids[idx]
- target = {'image_id': image_id, 'annotations': target}
- img, target = self.prepare(img, target)
- if self._transforms is not None:
- img, target = self._transforms(img, target)
- return img, target
- def build_coco(args, transform=None, is_train=False):
- root = Path(args.root)
- assert root.exists(), f'provided COCO path {root} does not exist'
- PATHS = {
- "train": (root / "train2017", root / "annotations" / 'instances_train2017.json'),
- "val": (root / "val2017", root / "annotations" / 'instances_val2017.json'),
- }
- image_set = "train" if is_train else "val"
- img_folder, ann_file = PATHS[image_set]
- # build transform
- dataset = CocoDetection(img_folder, ann_file, transform)
- return dataset
- if __name__ == "__main__":
- import argparse
- import cv2
- import numpy as np
-
- parser = argparse.ArgumentParser(description='COCO-Dataset')
- # opt
- parser.add_argument('--root', default='D:/python_work/dataset/COCO/',
- help='data root')
- parser.add_argument('--is_train', action="store_true", default=False,
- help='mixup augmentation.')
- args = parser.parse_args()
- np.random.seed(0)
- class_colors = [(np.random.randint(255),
- np.random.randint(255),
- np.random.randint(255)) for _ in range(80)]
- # config
- class BaseConfig(object):
- def __init__(self):
- # --------- Data process ---------
- ## input size
- self.train_min_size = [512] # short edge of image
- self.train_max_size = 736
- self.test_min_size = [512]
- self.test_max_size = 736
- ## Pixel mean & std
- self.pixel_mean = [0.485, 0.456, 0.406]
- self.pixel_std = [0.229, 0.224, 0.225]
- ## Transforms
- self.box_format = 'xyxy'
- self.normalize_coords = False
- self.detr_style = False
- self.trans_config = [
- {'name': 'RandomHFlip'},
- {'name': 'RandomResize'},
- {'name': 'RandomShift', 'max_shift': 32},
- ]
-
- cfg = BaseConfig()
- # build dataset
- transform = build_transform(cfg, is_train=True)
- dataset = build_coco(args, transform, is_train=False)
- for index, (image, target) in enumerate(dataset):
- print("{} / {}".format(index, len(dataset)))
- # to numpy
- image = image.permute(1, 2, 0).numpy()
- # denormalize
- image = (image * cfg.pixel_std + cfg.pixel_mean) * 255
- image = image.astype(np.uint8)[..., (2, 1, 0)].copy()
- orig_h, orig_w = image.shape[:2]
- tgt_bboxes = target["boxes"]
- tgt_labels = target["labels"]
- for box, label in zip(tgt_bboxes, tgt_labels):
- if cfg.normalize_coords:
- box[..., [0, 2]] *= orig_w
- box[..., [1, 3]] *= orig_h
- if cfg.box_format == 'xywh':
- box_x1y1 = box[..., :2] - box[..., 2:] * 0.5
- box_x2y2 = box[..., :2] + box[..., 2:] * 0.5
- box = torch.cat([box_x1y1, box_x2y2], dim=-1)
- # get box target
- x1, y1, x2, y2 = box.long()
- # get class label
- cls_name = coco_labels_80[label.item()]
- color = class_colors[label.item()]
- # draw bbox
- image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
- # put the test on the bbox
- cv2.putText(image, cls_name, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
- cv2.imshow("data", image)
- cv2.waitKey(0)
|