import cv2
import random
import numpy as np
import os.path as osp
import xml.etree.ElementTree as ET
import torch.utils.data as data

voc_class_indexs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
voc_class_labels = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')


class VOCAnnotationTransform(object):
    def __init__(self, class_to_ind=None, keep_difficult=False):
        self.class_to_ind = class_to_ind or dict(
            zip(voc_class_labels, range(len(voc_class_labels))))
        self.keep_difficult = keep_difficult

    def __call__(self, target):
        res = []
        for obj in target.iter('object'):
            difficult = int(obj.find('difficult').text) == 1
            if not self.keep_difficult and difficult:
                continue
            name = obj.find('name').text.lower().strip()
            bbox = obj.find('bndbox')

            pts = ['xmin', 'ymin', 'xmax', 'ymax']
            bndbox = []
            for i, pt in enumerate(pts):
                cur_pt = int(bbox.find(pt).text) - 1
                bndbox.append(cur_pt)
            label_idx = self.class_to_ind[name]
            bndbox.append(label_idx)
            res += [bndbox]  # [x1, y1, x2, y2, label_ind]

        return res  # [[x1, y1, x2, y2, label_ind], ... ]

class VOCDataset(data.Dataset):
    def __init__(self, 
                 root   :str = None, 
                 image_set  = [('2007', 'trainval'), ('2012', 'trainval')],
                 is_train   :bool =False,
                 ):
        # ----------- Basic parameters -----------
        self.image_set = image_set
        self.is_train  = is_train
        self.num_classes = 20
        # ----------- Path parameters -----------
        self.root = root
        self._annopath = osp.join('%s', 'Annotations', '%s.xml')
        self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
        # ----------- Data parameters -----------
        self.ids = list()
        for (year, name) in image_set:
            rootpath = osp.join(self.root, 'VOC' + year)
            for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
                self.ids.append((rootpath, line.strip()))
        self.dataset_size = len(self.ids)
        self.class_labels = voc_class_labels
        self.class_indexs = voc_class_indexs
        # ----------- Transform parameters -----------
        self.target_transform = VOCAnnotationTransform()

    def __len__(self):
        return self.dataset_size

    def pull_item(self, index):
        # load an image
        img_id = self.ids[index]
        image = cv2.imread(self._imgpath % img_id, cv2.IMREAD_COLOR)
        height, width, channels = image.shape

        # laod an annotation
        anno = ET.parse(self._annopath % img_id).getroot()
        anno = self.target_transform(anno)

        # guard against no boxes via resizing
        anno = np.array(anno).reshape(-1, 5)
        bboxes = anno[:, :4]  # [N, 4]
        labels = anno[:, 4]   # [N,]
        target = {
            "file_name": "{}.jpg".format(img_id[-1]),
            "bboxes": bboxes,
            "labels": labels,
            "orig_size": [height, width],
            "id": index,
        }
        
        return target


if __name__ == "__main__":
    import json

    # json_file = "D:\\python_work\\dataset\\COCO\\annotations\\instances_val2017.json"
    # with open(json_file, 'r') as f:
    #     data_dict = json.load(f)
    # print(data_dict['info'])
    # print(data_dict.keys())
    # print(len(data_dict["annotations"]))
    # print(len(data_dict["images"]))
    # print(data_dict["images"][0])
    # print(data_dict["images"][1])
    # print(data_dict["images"][2])
    # print(data_dict["annotations"][0])
    # print(data_dict["annotations"][1])
    # print(data_dict["annotations"][2])
    # exit()

    # opt
    is_train = True
    dataset = VOCDataset(root='D:/python_work/dataset/VOCdevkit/',
                         image_set=[('2007', 'trainval'), ('2012', 'trainval')] if is_train else [('2007', 'test')],
                         is_train=is_train,
                         )
    
    print('Data length: ', len(dataset))

    coco_dict = {
        "images": [],
        "annotations": [],
        "type": "instances",
        "categories": [{'supercategory': name, "id": i, 'name': name} for i, name in enumerate(voc_class_labels)]
    }
    anno_id = 0
    for i in range(len(dataset)):
        if i % 1000 == 0:
            print(" - [{}] / [{}] ...".format(i, len(dataset)))

        target = dataset.pull_item(i)

        # images info.
        file_name = target["file_name"]
        height, width = target["orig_size"]
        id = int(target["id"])

        coco_dict["images"].append({
            'file_name': file_name,
            'height': height,
            'width': width,
            'id': id
        })

        # annotation info.
        bboxes = target["bboxes"]
        labels = target["labels"]

        for bbox, label in zip(bboxes, labels):
            x1, y1, x2, y2 = bbox
            coco_dict["annotations"].append({
                'bbox': [int(x1), int(y1), int(x2 - x1), int(y2 - y1)],
                'area': int((x2 - x1) * (y2 - y1)),
                'category_id': int(label),
                'image_id': id,
                'id': anno_id,
                'iscrowd': 0,
            })
            anno_id += 1

    json_file = "D:\\python_work\\dataset\\VOCdevkit\\annotations\\instances_train.json"
    with open(json_file, 'w') as f:
        json.dump(coco_dict, f, indent=4)
    print(f"Data saved to {json_file}")