Browse Source

keep training YOLOX-L

yjh0410 2 years ago
parent
commit
6f4f753562

+ 1 - 1
config/data_config/transform_config.py

@@ -451,7 +451,7 @@ rtcdet_v2_nano_trans_config = {
     'hsv_v': 0.4,
     # Mosaic & Mixup
     'mosaic_prob': 1.0,
-    'mixup_prob': 0.5,
+    'mixup_prob': 0.1,
     'mosaic_type': 'yolov5_mosaic',
     'mixup_type': 'yolov5_mixup',
     'mixup_scale': [0.5, 1.5]   # "mixup_scale" is not used for YOLOv5MixUp

+ 61 - 0
dataset/scripts/data_to_h5py.py

@@ -0,0 +1,61 @@
+import cv2
+import h5py
+import os
+import argparse
+import numpy as np
+import sys
+
+sys.path.append('..')
+from voc import VOCDetection
+from coco import COCODataset
+
+# ---------------------- Opt ----------------------
+parser = argparse.ArgumentParser(description='Cache-Dataset')
+parser.add_argument('-d', '--dataset', default='voc',
+                    help='coco, voc, widerface, crowdhuman')
+parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/',
+                    help='data root')
+parser.add_argument('-size', '--img_size', default=640, type=int,
+                    help='input image size.')
+parser.add_argument('--mosaic', default=None, type=float,
+                    help='mosaic augmentation.')
+parser.add_argument('--mixup', default=None, type=float,
+                    help='mixup augmentation.')
+parser.add_argument('--keep_ratio', action="store_true", default=False,
+                    help='keep aspect ratio.')
+
+args = parser.parse_args()
+
+
+# ---------------------- Build Dataset ----------------------
+if args.dataset == 'voc':
+    root = os.path.join(args.root, 'VOCdevkit')
+    dataset = VOCDetection(args.img_size, root)
+elif args.dataset == 'coco':
+    root = os.path.join(args.root, 'COCO')
+    dataset = COCODataset(args.img_size, args.root)
+print('Data length: ', len(dataset))
+
+
+# ---------------------- Main Process ----------------------
+for i in range(len(dataset)):
+    # load an image
+    image, image_id = dataset.pull_image(i)
+    orig_h, orig_w, _ = image.shape
+
+    # resize image
+    if args.keep_ratio:
+        r = args.img_size / max(orig_h, orig_w)
+        if r != 1: 
+            interp = cv2.INTER_LINEAR
+            new_size = (int(orig_w * r), int(orig_h * r))
+            image = cv2.resize(image, new_size, interpolation=interp)
+        pad_image = np.ones([args.img_size, args.img_size, image.shape[2]], dtype=image.dtype) * 114
+        pad_image[:new_size[1], :new_size[0]] = image
+        image = pad_image
+    else:
+        image = cv2.resize(image, (int(args.img_size), int(args.img_size)))
+
+    cv2.imshow('image', image)
+    # cv2.imwrite(str(i)+'.jpg', img)
+    cv2.waitKey(0)

+ 44 - 17
dataset/voc.py

@@ -97,7 +97,9 @@ class VOCDetection(data.Dataset):
                  image_sets=[('2007', 'trainval'), ('2012', 'trainval')],
                  trans_config=None,
                  transform=None,
-                 is_train=False):
+                 is_train=False,
+                 load_cache=False
+                 ):
         self.root = data_dir
         self.img_size = img_size
         self.image_set = image_sets
@@ -106,6 +108,7 @@ class VOCDetection(data.Dataset):
         self._imgpath = osp.join('%s', 'JPEGImages', '%s.jpg')
         self.ids = list()
         self.is_train = is_train
+        self.load_cache = load_cache
         for (year, name) in image_sets:
             rootpath = osp.join(self.root, 'VOC' + year)
             for line in open(osp.join(rootpath, 'ImageSets', 'Main', name + '.txt')):
@@ -121,6 +124,10 @@ class VOCDetection(data.Dataset):
         print('use Mixup Augmentation: {}'.format(self.mixup_prob))
         print('==============================')
 
+        # load cache data
+        if load_cache:
+            self._load_cache()
+
 
     def __getitem__(self, index):
         image, target, deltas = self.pull_item(index)
@@ -131,24 +138,43 @@ class VOCDetection(data.Dataset):
         return len(self.ids)
 
 
-    def load_image_target(self, index):
-        # load an image
-        img_id = self.ids[index]
-        image = cv2.imread(self._imgpath % img_id)
-        height, width, channels = image.shape
+    def _load_cache(self):
+        # load image cache
+        self.image_list = None  # TODO: H5PY file
 
-        # laod an annotation
-        anno = ET.parse(self._annopath % img_id).getroot()
-        if self.target_transform is not None:
+        # load target cache
+        self.target_list = []
+        for img_id in self.ids:
+            anno = ET.parse(self._annopath % img_id).getroot()
             anno = self.target_transform(anno)
+            anno = np.array(anno).reshape(-1, 5)
+            self.target_list.append({"boxes": anno[:, :4], "labels": anno[:, 4]})
+        
 
-        # guard against no boxes via resizing
-        anno = np.array(anno).reshape(-1, 5)
-        target = {
-            "boxes": anno[:, :4],
-            "labels": anno[:, 4],
-            "orig_size": [height, width]
-        }
+    def load_image_target(self, index):
+        if self.load_cache:
+            image = self.image_list[index]
+            target = self.target_list[index]
+            height, width, channels = image.shape
+            target["orig_size"] = [height, width]
+        else:
+            # load an image
+            img_id = self.ids[index]
+            image = cv2.imread(self._imgpath % img_id)
+            height, width, channels = image.shape
+
+            # laod an annotation
+            anno = ET.parse(self._annopath % img_id).getroot()
+            if self.target_transform is not None:
+                anno = self.target_transform(anno)
+
+            # guard against no boxes via resizing
+            anno = np.array(anno).reshape(-1, 5)
+            target = {
+                "boxes": anno[:, :4],
+                "labels": anno[:, 4],
+                "orig_size": [height, width]
+            }
         
         return image, target
 
@@ -259,7 +285,8 @@ if __name__ == "__main__":
                         help='mixup augmentation.')
     parser.add_argument('--is_train', action="store_true", default=False,
                         help='mixup augmentation.')
-    
+    parser.add_argument('--load_cache', action="store_true", default=False,
+                        help='load cached data.')
     
     args = parser.parse_args()
 

+ 9 - 1
engine.py

@@ -507,7 +507,7 @@ class YoloxTrainer(object):
             images = images.to(self.device, non_blocking=True).float() / 255.
 
             # Multi scale
-            if self.args.multi_scale:
+            if self.args.multi_scale and ni % 10 == 0:
                 images, targets, img_size = self.rescale_image_targets(
                     images, targets, self.model_cfg['stride'], self.args.min_box_size, self.model_cfg['multi_scale'])
             else:
@@ -590,6 +590,14 @@ class YoloxTrainer(object):
             print(' - Close < perspective of rotation > ...')
             self.trans_cfg['perspective'] = 0.0
 
+        # close random affine
+        if 'translate' in self.trans_cfg.keys() and self.trans_cfg['translate'] > 0.0:
+            print(' - Close < translate of affine > ...')
+            self.trans_cfg['translate'] = 0.0
+        if 'scale' in self.trans_cfg.keys():
+            print(' - Close < scale of affine >...')
+            self.trans_cfg['scale'] = [1.0, 1.0]
+
         # build a new transform for second stage
         print(' - Rebuild transforms ...')
         self.train_transform, self.trans_cfg = build_transform(

+ 9 - 9
models/detectors/rtcdet_v2/README.md

@@ -2,19 +2,19 @@
 
 |   Model    | Scale | Batch | AP<sup>test<br>0.5:0.95 | AP<sup>test<br>0.5 | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |------------|-------|-------|-------------------------|--------------------|------------------------|-------------------|-------------------|--------------------|--------|
-| RTCDetv2-N |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-T |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-S |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-M |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-L |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-N |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-T |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-S |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-M |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-L |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
 | RTCDetv2-X |  640  |       |                         |                    |                        |                   |                   |                    |  |
 
 |   Model    | Scale | Batch | AP<sup>test<br>0.5:0.95 | AP<sup>test<br>0.5 | AP<sup>val<br>0.5:0.95 | AP<sup>val<br>0.5 | FLOPs<br><sup>(G) | Params<br><sup>(M) | Weight |
 |------------|-------|-------|-------------------------|--------------------|------------------------|-------------------|-------------------|--------------------|--------|
-| RTCDetv2-P |  320  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-P |  416  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-P |  512  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
-| RTCDetv2-P |  640  | 4xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-P |  320  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-P |  416  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-P |  512  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
+| RTCDetv2-P |  640  | 8xb16 |                         |                    |                        |                   |                   |                    |  |
 
 - For training, we train my RTCDetv2 series series with 300 epochs on COCO.
 - For data augmentation, we use the large scale jitter (LSJ), Mosaic augmentation and Mixup augmentation, following the setting of [YOLOX](https://github.com/ultralytics/yolov5), but we remove the rotation transformation which is used in YOLOX's strong augmentation.

+ 2 - 2
train_ddp.sh

@@ -11,9 +11,9 @@ python -m torch.distributed.run --nproc_per_node=8 train.py \
                                                     --wp_epoch 3 \
                                                     --max_epoch 300 \
                                                     --eval_epoch 10 \
-                                                    --no_aug_epoch 20 \
+                                                    --no_aug_epoch 15 \
                                                     --ema \
                                                     --fp16 \
                                                     --sybn \
                                                     --multi_scale \
-                                                    # --resume weights/coco/yolovx_s/yolovx_s_best.pth \
+                                                     --resume weights/coco/yolox_l/yolox_l_best.pth \