2 роки тому · a1207db041
--- a/README.md
+++ b/README.md
@@ -78,6 +78,7 @@ python train.py --cuda -d voc --root path/to/VOCdevkit -m yolov1 -bs 16 --max_ep
 
				 
			
 
				 
			
 
				 ### COCO
			
 
				+
			
 
				 - Download COCO.
			
 
				 ```Shell
			
 
				 cd <PyTorch_YOLO_Tutorial>
			
@@ -98,10 +99,6 @@ For example:
 
				 python train.py --cuda -d coco --root path/to/COCO -m yolov1 -bs 16 --max_epoch 150 --wp_epoch 1 --eval_epoch 10 --fp16 --ema --multi_scale
			
 
				 ```
			
 
				 
			
 
				-Due to my limited computing resources, I had to set the batch size to 16 or even smaller during training. I found that for small models such as *-Nano or *-Tiny, their performance seems less sensitive to batch size, such as the YOLOv5-N and S I reproduced, which are even slightly stronger than the official YOLOv5-N and S. However, for large models such as *-Large, their performance is significantly lower than the official performance, which seems to indicate that the large model is more sensitive to batch size.
			
 
				-
			
 
				-I have provided a bash file `train_ddp.sh` that enables DDP training. I hope someone could use more GPUs to train the large models with a larger batch size, such as YOLOv5-L, YOLOX, and YOLOv7-L. If the performance trained with a larger batch size is higher, I would be grateful if you could share the trained model with me.
			
 
				-
			
 
				 * Redesigned YOLOv1~v2:
			
 
				 
			
 
				 | Model         |   Backbone         | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
@@ -154,6 +151,33 @@ I have provided a bash file `train_ddp.sh` that enables DDP training. I hope som
 
				 
			
 
				 *While YOLOv7 incorporates several technical details, such as anchor box, SimOTA, AuxiliaryHead, and RepConv, I found it too challenging to fully reproduce. Instead, I created a simpler version of YOLOv7 using an anchor-free structure and SimOTA. As a result, my reproduction had poor performance due to the absence of the other technical details. However, since it was only intended as a tutorial, I am not too concerned about this gap.*
			
 
				 
			
 
				+* YOLOX2:
			
 
				+
			
 
				+| Model    |  Backbone   | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				+|----------|-------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				+| YOLOX2-N | ELLANNet-N  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| YOLOX2-S | ELLANNet-S  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| YOLOX2-M | ELLANNet-M  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| YOLOX2-L | ELLANNet-L  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+
			
 
				+* ETE-YOLO:
			
 
				+
			
 
				+| Model      |  Backbone   | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				+|------------|-------------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				+| ETE-YOLO-N | ELLANNet-N  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| ETE-YOLO-S | ELLANNet-S  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| ETE-YOLO-M | ELLANNet-M  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| ETE-YOLO-L | ELLANNet-L  |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+
			
 
				+* Redesigned RT-DETR:
			
 
				+
			
 
				+| Model     | Scale | Epoch | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
			
 
				+|-----------|-------|-------|------------------------|-------------------|-------------------|--------------------|--------|
			
 
				+| RT-DETR-N |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| RT-DETR-S |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| RT-DETR-M |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+| RT-DETR-L |  640  |  300  |                        |                   |                   |                    |  |
			
 
				+
			
 
				 #### Necessary instructions：
			
 
				 
			
 
				 - *All models are trained with ImageNet pretrained weight (IP). All FLOPs are measured with a 640x640 image size on COCO val2017. The FPS is measured with batch size 1 on 3090 GPU from the model inference to the NMS operation.*
			
--- a/config/data_config/dataset_config.py
+++ b/config/data_config/dataset_config.py
@@ -38,9 +38,9 @@ dataset_cfg = {
 
				 
			
 
				     'ourdataset':{
			
 
				         'data_name': 'AnimalDataset',
			
 
				-        'num_classes': 4,
			
 
				+        'num_classes': 9,
			
 
				         'class_indexs': None,
			
 
				-        'class_names': ('butterfly', 'cat', 'dog', 'person'),
			
 
				+        'class_names': ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', ),
			
 
				     },
			
 
				 
			
 
				 }
			
--- a/dataset/ourdataset.py
+++ b/dataset/ourdataset.py
@@ -17,7 +17,7 @@ except:
 
				     from data_augment.yolov5_augment import yolov5_mosaic_augment, yolov5_mixup_augment, yolox_mixup_augment
			
 
				 
			
 
				 # please define our class labels
			
 
				-our_class_labels = ('cat',)
			
 
				+our_class_labels = ('bird', 'butterfly', 'cat', 'cow', 'dog', 'lion', 'person', 'pig', 'tiger', )
			
 
				 
			
 
				 
			
 
				 
			
@@ -51,10 +51,16 @@ class OurDataset(Dataset):
 
				 
			
 
				         # augmentation
			
 
				         self.transform = transform
			
 
				-        self.mosaic_prob = trans_config['mosaic_prob'] if trans_config else 0.0
			
 
				-        self.mixup_prob = trans_config['mixup_prob'] if trans_config else 0.0
			
 
				+        self.mosaic_prob = 0
			
 
				+        self.mixup_prob = 0
			
 
				         self.trans_config = trans_config
			
 
				+        if trans_config is not None:
			
 
				+            self.mosaic_prob = trans_config['mosaic_prob']
			
 
				+            self.mixup_prob = trans_config['mixup_prob']
			
 
				+
			
 
				         print('==============================')
			
 
				+        print('Image Set: {}'.format(image_set))
			
 
				+        print('Json file: {}'.format(self.json_file))
			
 
				         print('use Mosaic Augmentation: {}'.format(self.mosaic_prob))
			
 
				         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
			
 
				         print('==============================')
			
@@ -100,14 +106,14 @@ class OurDataset(Dataset):
 
				             image_list.append(img_i)
			
 
				             target_list.append(target_i)
			
 
				 
			
 
				-        # Mosaic Augment
			
 
				+        # Mosaic
			
 
				         if self.trans_config['mosaic_type'] == 'yolov5_mosaic':
			
 
				             image, target = yolov5_mosaic_augment(
			
 
				-                image_list, target_list, self.img_size, self.trans_config)
			
 
				-                
			
 
				+                image_list, target_list, self.img_size, self.trans_config, self.is_train)
			
 
				+
			
 
				         return image, target
			
 
				 
			
 
				-        
			
 
				+
			
 
				     def load_mixup(self, origin_image, origin_target):
			
 
				         # YOLOv5 type Mixup
			
 
				         if self.trans_config['mixup_type'] == 'yolov5_mixup':
			
@@ -156,11 +162,15 @@ class OurDataset(Dataset):
 
				 
			
 
				 
			
 
				     def pull_anno(self, index):
			
 
				-        id_ = self.ids[index]
			
 
				-
			
 
				-        anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=None)
			
 
				+        img_id = self.ids[index]
			
 
				+        im_ann = self.coco.loadImgs(img_id)[0]
			
 
				+        anno_ids = self.coco.getAnnIds(imgIds=[int(img_id)], iscrowd=0)
			
 
				         annotations = self.coco.loadAnns(anno_ids)
			
 
				         
			
 
				+        # image infor
			
 
				+        width = im_ann['width']
			
 
				+        height = im_ann['height']
			
 
				+        
			
 
				         #load a target
			
 
				         bboxes = []
			
 
				         labels = []
			
@@ -169,9 +179,9 @@ class OurDataset(Dataset):
 
				                 # bbox
			
 
				                 x1 = np.max((0, anno['bbox'][0]))
			
 
				                 y1 = np.max((0, anno['bbox'][1]))
			
 
				-                x2 = x1 + anno['bbox'][2]
			
 
				-                y2 = y1 + anno['bbox'][3]
			
 
				-                if x2 < x1 or y2 < y1:
			
 
				+                x2 = np.min((width - 1, x1 + np.max((0, anno['bbox'][2] - 1))))
			
 
				+                y2 = np.min((height - 1, y1 + np.max((0, anno['bbox'][3] - 1))))
			
 
				+                if x2 <= x1 or y2 <= y1:
			
 
				                     continue
			
 
				                 # class label
			
 
				                 cls_id = self.class_ids.index(anno['category_id'])
			
@@ -188,22 +198,29 @@ class OurDataset(Dataset):
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     import argparse
			
 
				-    import sys
			
 
				     from build import build_transform
			
 
				     
			
 
				-    parser = argparse.ArgumentParser(description='Our-Dataset')
			
 
				+    parser = argparse.ArgumentParser(description='FreeYOLOv2')
			
 
				 
			
 
				     # opt
			
 
				-    parser.add_argument('--root', default='OurDataset',
			
 
				+    parser.add_argument('--root', default='AnimalDataset',
			
 
				                         help='data root')
			
 
				     parser.add_argument('--split', default='train',
			
 
				                         help='data split')
			
 
				+    parser.add_argument('-size', '--img_size', default=640, type=int, 
			
 
				+                        help='input image size')
			
 
				+    parser.add_argument('--min_box_size', default=8.0, type=float,
			
 
				+                        help='min size of target bounding box.')
			
 
				+    parser.add_argument('--mosaic', default=None, type=float,
			
 
				+                        help='mosaic augmentation.')
			
 
				+    parser.add_argument('--mixup', default=None, type=float,
			
 
				+                        help='mixup augmentation.')
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				     
			
 
				-    is_train = False
			
 
				     img_size = 640
			
 
				-    yolov5_trans_config = {
			
 
				+    is_train = True
			
 
				+    trans_config = {
			
 
				         'aug_type': 'yolov5',
			
 
				         # Basic Augment
			
 
				         'degrees': 0.0,
			
@@ -216,26 +233,20 @@ if __name__ == "__main__":
 
				         'hsv_v': 0.4,
			
 
				         # Mosaic & Mixup
			
 
				         'mosaic_prob': 1.0,
			
 
				+        'mosaic_9x_prob': 0.2,
			
 
				         'mixup_prob': 0.15,
			
 
				         'mosaic_type': 'yolov5_mosaic',
			
 
				         'mixup_type': 'yolov5_mixup',
			
 
				         'mixup_scale': [0.5, 1.5]
			
 
				     }
			
 
				-    ssd_trans_config = {
			
 
				-        'aug_type': 'ssd',
			
 
				-        'mosaic_prob': 0.0,
			
 
				-        'mixup_prob': 0.0
			
 
				-    }
			
 
				-
			
 
				-    transform = build_transform(img_size, yolov5_trans_config, is_train)
			
 
				+    transform, trans_config = build_transform(args, trans_config, max_stride=32, is_train=is_train)
			
 
				 
			
 
				     dataset = OurDataset(
			
 
				         img_size=img_size,
			
 
				         data_dir=args.root,
			
 
				-        image_set='train',
			
 
				-        trans_config=yolov5_trans_config,
			
 
				+        image_set=args.split,
			
 
				         transform=transform,
			
 
				-        is_train=is_train
			
 
				+        trans_config=trans_config,
			
 
				         )
			
 
				     
			
 
				     np.random.seed(0)
			
@@ -248,7 +259,6 @@ if __name__ == "__main__":
 
				         image, target, deltas = dataset.pull_item(i)
			
 
				         # to numpy
			
 
				         image = image.permute(1, 2, 0).numpy()
			
 
				-        # to uint8
			
 
				         image = image.astype(np.uint8)
			
 
				         image = image.copy()
			
 
				         img_h, img_w = image.shape[:2]
			
@@ -262,9 +272,11 @@ if __name__ == "__main__":
 
				             color = class_colors[cls_id]
			
 
				             # class name
			
 
				             label = our_class_labels[cls_id]
			
 
				-            image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 2)
			
 
				-            # put the test on the bbox
			
 
				-            cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
			
 
				+            if x2 - x1 > 0. and y2 - y1 > 0.:
			
 
				+                # draw bbox
			
 
				+                image = cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)
			
 
				+                # put the test on the bbox
			
 
				+                cv2.putText(image, label, (int(x1), int(y1 - 5)), 0, 0.5, color, 1, lineType=cv2.LINE_AA)
			
 
				         cv2.imshow('gt', image)
			
 
				         # cv2.imwrite(str(i)+'.jpg', img)
			
 
				         cv2.waitKey(0)