junxiaoyao
/
YOLO-Tutorial-v2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
							import os
import cv2
import time
import numpy as np
from pycocotools.coco import COCO

try:
    from .data_augment.strong_augment import MosaicAugment, MixupAugment
    from .voc import VOCDataset
except:
    from  data_augment.strong_augment import MosaicAugment, MixupAugment
    from  voc import VOCDataset


coco_class_indexs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
coco_class_labels = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',  'traffic light',  'fire hydrant',  'stop sign',  'parking meter',  'bench',  'bird',  'cat',  'dog',  'horse',  'sheep',  'cow',  'elephant',  'bear',  'zebra',  'giraffe',  'backpack',  'umbrella',  'handbag',  'tie',  'suitcase',  'frisbee',  'skis',  'snowboard',  'sports ball',  'kite',  'baseball bat',  'baseball glove',  'skateboard',  'surfboard',  'tennis racket',  'bottle',  'wine glass',  'cup',  'fork',  'knife',  'spoon',  'bowl',  'banana',  'apple',  'sandwich',  'orange',  'broccoli',  'carrot',  'hot dog',  'pizza',  'donut',  'cake',  'chair',  'couch',  'potted plant',  'bed',  'dining table',  'toilet',  'tv',  'laptop',  'mouse',  'remote',  'keyboard',  'cell phone',  'microwave',  'oven',  'toaster',  'sink',  'refrigerator',  'book',  'clock',  'vase',  'scissors',  'teddy bear',  'hair drier',  'toothbrush')
coco_json_files = {
    'train2017'      : 'instances_train2017.json',
    'val2017'        : 'instances_val2017.json',
    'test2017'       : 'image_info_test.json',
}


class COCODataset(VOCDataset):
    def __init__(self, 
                 cfg,
                 data_dir  :str = None, 
                 transform = None,
                 is_train  :bool = False,
                 use_mask  :bool = False,
                 ):
        # ----------- Basic parameters -----------
        self.data_dir  = data_dir
        self.image_set = "train2017" if is_train else "val2017"
        self.is_train  = is_train
        self.use_mask  = use_mask
        self.num_classes = 80
        # ----------- Data parameters -----------
        self.json_file = coco_json_files['{}'.format(self.image_set)]
        self.coco = COCO(os.path.join(self.data_dir, 'annotations', self.json_file))
        self.ids = self.coco.getImgIds()
        self.class_ids = sorted(self.coco.getCatIds())
        self.dataset_size = len(self.ids)
        self.class_labels = coco_class_labels
        self.class_indexs = coco_class_indexs
        # ----------- Transform parameters -----------
        self.transform = transform
        if is_train:
            self.mosaic_augment = MosaicAugment(cfg.train_img_size, cfg.affine_params, is_train)
            self.mixup_augment = MixupAugment(cfg.train_img_size)
            self.mosaic_prob = cfg.mosaic_prob
            self.mixup_prob  = cfg.mixup_prob
            self.copy_paste  = cfg.copy_paste
        else:
            self.mosaic_prob = 0.0
            self.mixup_prob  = 0.0
            self.copy_paste  = 0.0
            self.mosaic_augment = None
            self.mixup_augment  = None

        print(' ============ Strong augmentation info. ============ ')
        print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
        print('use Mixup Augmentation: {}'.format(self.mixup_prob))
        print('use Copy-paste Augmentation: {}'.format(self.copy_paste))

    def pull_image(self, index):
        # get the image file name
        image_dict = self.coco.dataset['images'][index]
        image_id = image_dict["id"]
        filename = image_dict["file_name"]

        # load the image
        image_path = os.path.join(self.data_dir, self.image_set, filename)
        image = cv2.imread(image_path)

        assert image is not None

        return image, image_id

    def pull_anno(self, index):
        img_id = self.ids[index]
        # image infor
        im_ann = self.coco.loadImgs(img_id)[0]
        width = im_ann['width']
        height = im_ann['height']
 
        # load a target
        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=False)
        annotations = self.coco.loadAnns(anno_ids)
        bboxes = []
        labels = []
        for anno in annotations:
            if 'bbox' in anno and anno['area'] > 0:
                # bbox
                x1 = np.max((0, anno['bbox'][0]))
                y1 = np.max((0, anno['bbox'][1]))
                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
                if x2 < x1 or y2 < y1:
                    continue
                # class label
                cls_id = self.class_ids.index(anno['category_id'])
                
                bboxes.append([x1, y1, x2, y2])
                labels.append(cls_id)

        # guard against no boxes via resizing
        bboxes = np.array(bboxes).reshape(-1, 4)
        labels = np.array(labels).reshape(-1)
        
        return bboxes, labels


if __name__ == "__main__":
    import time
    import argparse
    from build import build_transform
    
    parser = argparse.ArgumentParser(description='COCO-Dataset')

    # opt
    parser.add_argument('--root', default='D:/python_work/dataset/COCO/',
                        help='data root')
    parser.add_argument('--is_train', action="store_true", default=False,
                        help='mixup augmentation.')
    parser.add_argument('--aug_type', default="yolo", type=str, choices=["yolo", "ssd"],
                        help='yolo, ssd.')

    args = parser.parse_args()

    class YoloBaseConfig(object):
        def __init__(self) -> None:
            self.max_stride = 32
            # ---------------- Data process config ----------------
            self.box_format = 'xywh'
            self.normalize_coords = False
            self.mosaic_prob = 1.0
            self.mixup_prob  = 0.15
            self.copy_paste  = 0.3
            ## Pixel mean & std
            self.pixel_mean = [0., 0., 0.]
            self.pixel_std  = [255., 255., 255.]
            ## Transforms
            self.train_img_size = 640
            self.test_img_size  = 640
            self.use_ablu = True
            self.aug_type = 'yolo'
            self.affine_params = {
                'degrees': 0.0,
                'translate': 0.2,
                'scale': [0.1, 2.0],
                'shear': 0.0,
                'perspective': 0.0,
                'hsv_h': 0.015,
                'hsv_s': 0.7,
                'hsv_v': 0.4,
            }

    class SSDBaseConfig(object):
        def __init__(self) -> None:
            self.max_stride = 32
            # ---------------- Data process config ----------------
            self.box_format = 'xywh'
            self.normalize_coords = False
            self.mosaic_prob = 0.0
            self.mixup_prob  = 0.0
            self.copy_paste  = 0.0
            ## Pixel mean & std
            self.pixel_mean = [0., 0., 0.]
            self.pixel_std  = [255., 255., 255.]
            ## Transforms
            self.train_img_size = 640
            self.test_img_size  = 640
            self.aug_type = 'ssd'

    if args.aug_type == "yolo":
        cfg = YoloBaseConfig()
    elif args.aug_type == "ssd":
        cfg = SSDBaseConfig()

    transform = build_transform(cfg, args.is_train)
    dataset = COCODataset(cfg, args.root, transform, args.is_train)
    
    np.random.seed(0)
    class_colors = [(np.random.randint(255),
                     np.random.randint(255),
                     np.random.randint(255)) for _ in range(80)]
    print('Data length: ', len(dataset))

    for i in range(1000):
        t0 = time.time()
        image, target, deltas = dataset.pull_item(i)
        print("Load data: {} s".format(time.time() - t0))

        # to numpy
        image = image.permute(1, 2, 0).numpy()
        
        # denormalize
        image = image * cfg.pixel_std + cfg.pixel_mean

        # rgb -> bgr
        if transform.color_format == 'rgb':
            image = image[..., (2, 1, 0)]

        # to uint8
        image = image.astype(np.uint8)
        image = image.copy()
        img_h, img_w = image.shape[:2]

        boxes = target["boxes"]
        labels = target["labels"]

        for box, label in zip(boxes, labels):
            if cfg.box_format == 'xyxy':
                x1, y1, x2, y2 = box
            elif cfg.box_format == 'xywh':
                cx, cy, bw, bh = box
                x1 = cx - 0.5 * bw
                y1 = cy - 0.5 * bh
                x2 = cx + 0.5 * bw
                y2 = cy + 0.5 * bh
            
            if cfg.normalize_coords:
                x1 *= img_w
                y1 *= img_h
                x2 *= img_w
                y2 *= img_h

            cls_id = int(label)
            color = class_colors[cls_id]
            # class name
            label = coco_class_labels[cls_id]
            image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
            # put the test on the bbox
            cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
        cv2.imshow('gt', image)
        # cv2.imwrite(str(i)+'.jpg', img)
        cv2.waitKey(0)