Переглянути джерело

fix a bug in ourdataset.py

yjh0410 2 роки тому
батько
коміт
a1207db041
3 змінених файлів з 74 додано та 38 видалено
  1. 28 4
      README.md
  2. 2 2
      config/data_config/dataset_config.py
  3. 44 32
      dataset/ourdataset.py

+ 28 - 4
README.md

@@ -78,6 +78,7 @@ python train.py --cuda -d voc --root path/to/VOCdevkit -m yolov1 -bs 16 --max_ep
 
 
 ### COCO
+
 - Download COCO.
 ```Shell
 cd <PyTorch_YOLO_Tutorial>
@@ -98,10 +99,6 @@ For example:
 python train.py --cuda -d coco --root path/to/COCO -m yolov1 -bs 16 --max_epoch 150 --wp_epoch 1 --eval_epoch 10 --fp16 --ema --multi_scale
 ```
 
-Due to my limited computing resources, I had to set the batch size to 16 or even smaller during training. I found that for small models such as *-Nano or *-Tiny, their performance seems less sensitive to batch size, such as the YOLOv5-N and S I reproduced, which are even slightly stronger than the official YOLOv5-N and S. However, for large models such as *-Large, their performance is significantly lower than the official performance, which seems to indicate that the large model is more sensitive to batch size.
-
-I have provided a bash file `train_ddp.sh` that enables DDP training. I hope someone could use more GPUs to train the large models with a larger batch size, such as YOLOv5-L, YOLOX, and YOLOv7-L. If the performance trained with a larger batch size is higher, I would be grateful if you could share the trained model with me.
-
 * Redesigned YOLOv1~v2:
 
 | Model         |   Backbone         | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
@@ -154,6 +151,33 @@ I have provided a bash file `train_ddp.sh` that enables DDP training. I hope som
 
 *While YOLOv7 incorporates several technical details, such as anchor box, SimOTA, AuxiliaryHead, and RepConv, I found it too challenging to fully reproduce. Instead, I created a simpler version of YOLOv7 using an anchor-free structure and SimOTA. As a result, my reproduction had poor performance due to the absence of the other technical details. However, since it was only intended as a tutorial, I am not too concerned about this gap.*
 
+* YOLOX2:
+
+| Model    |  Backbone   | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|----------|-------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| YOLOX2-N | ELLANNet-N  |  640  |  300  |                        |                   |                   |                    |  |
+| YOLOX2-S | ELLANNet-S  |  640  |  300  |                        |                   |                   |                    |  |
+| YOLOX2-M | ELLANNet-M  |  640  |  300  |                        |                   |                   |                    |  |
+| YOLOX2-L | ELLANNet-L  |  640  |  300  |                        |                   |                   |                    |  |
+
+* ETE-YOLO:
+
+| Model      |  Backbone   | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|------------|-------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| ETE-YOLO-N | ELLANNet-N  |  640  |  300  |                        |                   |                   |                    |  |
+| ETE-YOLO-S | ELLANNet-S  |  640  |  300  |                        |                   |                   |                    |  |
+| ETE-YOLO-M | ELLANNet-M  |  640  |  300  |                        |                   |                   |                    |  |
+| ETE-YOLO-L | ELLANNet-L  |  640  |  300  |                        |                   |                   |                    |  |
+
+* Redesigned RT-DETR:
+
+| Model     | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
+|-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
+| RT-DETR-N |  640  |  300  |                        |                   |                   |                    |  |
+| RT-DETR-S |  640  |  300  |                        |                   |                   |                    |  |
+| RT-DETR-M |  640  |  300  |                        |                   |                   |                    |  |
+| RT-DETR-L |  640  |  300  |                        |                   |                   |                    |  |
+
 #### Necessary instructions:
 
 - *All models are trained with ImageNet pretrained weight (IP). All FLOPs are measured with a 640x640 image size on COCO val2017. The FPS is measured with batch size 1 on 3090 GPU from the model inference to the NMS operation.*

+ 2 - 2
config/data_config/dataset_config.py

@@ -38,9 +38,9 @@ dataset_cfg = {
 
     'ourdataset':{
         'data_name': 'AnimalDataset',
-        'num_classes': 4,
+        'num_classes': 9,
         'class_indexs': None,
-        'class_names': ('butterfly', 'cat', 'dog', 'person'),
+        'class_names': ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', ),
     },
 
 }

+ 44 - 32
dataset/ourdataset.py

@@ -17,7 +17,7 @@ except:
     from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
 
 # please define our class labels
-our_class_labels = ('cat',)
+our_class_labels = ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', )
 
 
 
@@ -51,10 +51,16 @@ class OurDataset(Dataset):
 
         # augmentation
         self.transform = transform
-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
+        self.mosaic_prob = 0
+        self.mixup_prob = 0
         self.trans_config = trans_config
+        if trans_config is not None:
+            self.mosaic_prob = trans_config['mosaic_prob']
+            self.mixup_prob = trans_config['mixup_prob']
+
         print('==============================')
+        print('Image Set: {}'.format(image_set))
+        print('Json file: {}'.format(self.json_file))
         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
         print('==============================')
@@ -100,14 +106,14 @@ class OurDataset(Dataset):
             image_list.append(img_i)
             target_list.append(target_i)
 
-        # Mosaic Augment
+        # Mosaic
         if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
             image, target = yolov5_mosaic_augment(
-                image_list, target_list, self.img_size, self.trans_config)
-                
+                image_list, target_list, self.img_size, self.trans_config, self.is_train)
+
         return image, target
 
-        
+
     def load_mixup(self, origin_image, origin_target):
         # YOLOv5 type Mixup
         if self.trans_config['mixup_type'] == 'yolov5_mixup':
@@ -156,11 +162,15 @@ class OurDataset(Dataset):
 
 
     def pull_anno(self, index):
-        id_ = self.ids[index]
-
-        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None)
+        img_id = self.ids[index]
+        im_ann = self.coco.loadImgs(img_id)[0]
+        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=0)
         annotations = self.coco.loadAnns(anno_ids)
         
+        # image infor
+        width = im_ann['width']
+        height = im_ann['height']
+        
         #load a target
         bboxes = []
         labels = []
@@ -169,9 +179,9 @@ class OurDataset(Dataset):
                 # bbox
                 x1 = np.max((0, anno['bbox'][0]))
                 y1 = np.max((0, anno['bbox'][1]))
-                x2 = x1 + anno['bbox'][2]
-                y2 = y1 + anno['bbox'][3]
-                if x2 < x1 or y2 < y1:
+                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
+                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
+                if x2 <= x1 or y2 <= y1:
                     continue
                 # class label
                 cls_id = self.class_ids.index(anno['category_id'])
@@ -188,22 +198,29 @@ class OurDataset(Dataset):
 
 if __name__ == "__main__":
     import argparse
-    import sys
     from build import build_transform
     
-    parser = argparse.ArgumentParser(description='Our-Dataset')
+    parser = argparse.ArgumentParser(description='FreeYOLOv2')
 
     # opt
-    parser.add_argument('--root', default='OurDataset',
+    parser.add_argument('--root', default='AnimalDataset',
                         help='data root')
     parser.add_argument('--split', default='train',
                         help='data split')
+    parser.add_argument('-size', '--img_size', default=640, type=int, 
+                        help='input image size')
+    parser.add_argument('--min_box_size', default=8.0, type=float,
+                        help='min size of target bounding box.')
+    parser.add_argument('--mosaic', default=None, type=float,
+                        help='mosaic augmentation.')
+    parser.add_argument('--mixup', default=None, type=float,
+                        help='mixup augmentation.')
 
     args = parser.parse_args()
     
-    is_train = False
     img_size = 640
-    yolov5_trans_config = {
+    is_train = True
+    trans_config = {
         'aug_type': 'yolov5',
         # Basic Augment
         'degrees': 0.0,
@@ -216,26 +233,20 @@ if __name__ == "__main__":
         'hsv_v': 0.4,
         # Mosaic & Mixup
         'mosaic_prob': 1.0,
+        'mosaic_9x_prob': 0.2,
         'mixup_prob': 0.15,
         'mosaic_type': 'yolov5_mosaic',
         'mixup_type': 'yolov5_mixup',
         'mixup_scale': [0.5, 1.5]
     }
-    ssd_trans_config = {
-        'aug_type': 'ssd',
-        'mosaic_prob': 0.0,
-        'mixup_prob': 0.0
-    }
-
-    transform = build_transform(img_size, yolov5_trans_config, is_train)
+    transform, trans_config = build_transform(args, trans_config, max_stride=32, is_train=is_train)
 
     dataset = OurDataset(
         img_size=img_size,
         data_dir=args.root,
-        image_set='train',
-        trans_config=yolov5_trans_config,
+        image_set=args.split,
         transform=transform,
-        is_train=is_train
+        trans_config=trans_config,
         )
     
     np.random.seed(0)
@@ -248,7 +259,6 @@ if __name__ == "__main__":
         image, target, deltas = dataset.pull_item(i)
         # to numpy
         image = image.permute(1, 2, 0).numpy()
-        # to uint8
         image = image.astype(np.uint8)
         image = image.copy()
         img_h, img_w = image.shape[:2]
@@ -262,9 +272,11 @@ if __name__ == "__main__":
             color = class_colors[cls_id]
             # class name
             label = our_class_labels[cls_id]
-            image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
-            # put the test on the bbox
-            cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
+            if x2 - x1 > 0. and y2 - y1 > 0.:
+                # draw bbox
+                image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
+                # put the test on the bbox
+                cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
         cv2.imshow('gt', image)
         # cv2.imwrite(str(i)+'.jpg', img)
         cv2.waitKey(0)