浏览代码

unify the train script

yjh0410 2 年之前
父节点
当前提交
c6a1334557
共有 3 个文件被更改,包括 87 次插入126 次删除
  1. 87 0
      train.sh
  2. 0 64
      train_multi_gpus.sh
  3. 0 62
      train_single_gpu.sh

+ 87 - 0
train.sh

@@ -0,0 +1,87 @@
+# Dataset setting
+DATASET="coco"
+DATA_ROOT="/data/datasets/"
+# DATA_ROOT="/Users/liuhaoran/Desktop/python_work/object-detection/dataset/"
+
+# MODEL setting
+MODEL="yolov8_n"
+IMAGE_SIZE=640
+RESUME="None"
+if [[ $MODEL == *"yolov8"* ]]; then
+    # Epoch setting
+    BATCH_SIZE=128
+    MAX_EPOCH=500
+    WP_EPOCH=3
+    EVAL_EPOCH=10
+    NO_AUG_EPOCH=20
+elif [[ $MODEL == *"yolox"* ]]; then
+    # Epoch setting
+    BATCH_SIZE=64
+    MAX_EPOCH=300
+    WP_EPOCH=3
+    EVAL_EPOCH=10
+    NO_AUG_EPOCH=15
+elif [[ $MODEL == *"yolov7"* ]]; then
+    # Epoch setting
+    BATCH_SIZE=128
+    MAX_EPOCH=300
+    WP_EPOCH=3
+    EVAL_EPOCH=10
+    NO_AUG_EPOCH=20
+elif [[ $MODEL == *"yolov5"* || $MODEL == *"yolov4"* || $MODEL == *"yolov3"* ]]; then
+    # Epoch setting
+    BATCH_SIZE=128
+    MAX_EPOCH=300
+    WP_EPOCH=3
+    EVAL_EPOCH=10
+    NO_AUG_EPOCH=15
+else
+    # Epoch setting
+    BATCH_SIZE=128
+    MAX_EPOCH=150
+    WP_EPOCH=3
+    EVAL_EPOCH=10
+    NO_AUG_EPOCH=0
+fi
+
+# -------------------------- Train Pipeline --------------------------
+WORLD_SIZE=1
+if [ $WORLD_SIZE == 1 ]; then
+    python train.py \
+            --cuda \
+            --dataset ${DATASET} \
+            --root ${DATA_ROOT} \
+            --model ${MODEL} \
+            --batch_size ${BATCH_SIZE} \
+            --img_size ${IMAGE_SIZE} \
+            --wp_epoch ${WP_EPOCH} \
+            --max_epoch ${MAX_EPOCH} \
+            --eval_epoch ${EVAL_EPOCH} \
+            --no_aug_epoch ${NO_AUG_EPOCH} \
+            --resume ${RESUME} \
+            --ema \
+            --fp16 \
+            --multi_scale
+elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then
+    python -m torch.distributed.run --nproc_per_node=8 train.py \
+            --cuda \
+            -dist \
+            --dataset ${DATASET} \
+            --root ${DATA_ROOT} \
+            --model ${MODEL} \
+            --batch_size ${BATCH_SIZE} \
+            --img_size ${IMAGE_SIZE} \
+            --wp_epoch ${WP_EPOCH} \
+            --max_epoch ${MAX_EPOCH} \
+            --eval_epoch ${EVAL_EPOCH} \
+            --no_aug_epoch ${NO_AUG_EPOCH} \
+            --resume ${RESUME} \
+            --ema \
+            --fp16 \
+            --multi_scale \
+            --sybn
+else
+    echo "The WORLD_SIZE is set to a value greater than 8, indicating the use of multi-machine \
+          multi-card training mode, which is currently unsupported."
+    exit 1
+fi

+ 0 - 64
train_multi_gpus.sh

@@ -1,64 +0,0 @@
-# Dataset setting
-DATASET="coco"
-DATA_ROOT="/data/datasets/"
-# DATA_ROOT="/Users/liuhaoran/Desktop/python_work/object-detection/dataset/"
-
-# MODEL setting
-MODEL="yolov8_n"
-IMAGE_SIZE=640
-RESUME="None"
-if [[ $MODEL == *"yolov8"* ]]; then
-    # Epoch setting
-    BATCH_SIZE=128
-    MAX_EPOCH=500
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=20
-elif [[ $MODEL == *"yolox"* ]]; then
-    # Epoch setting
-    BATCH_SIZE=64
-    MAX_EPOCH=300
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=15
-elif [[ $MODEL == *"yolov7"* ]]; then
-    # Epoch setting
-    BATCH_SIZE=128
-    MAX_EPOCH=300
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=20
-elif [[ $MODEL == *"yolov5"* || $MODEL == *"yolov4"* || $MODEL == *"yolov3"* ]]; then
-    # Epoch setting
-    BATCH_SIZE=128
-    MAX_EPOCH=300
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=15
-else
-    # Epoch setting
-    BATCH_SIZE=128
-    MAX_EPOCH=150
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=0
-fi
-
-# -------------------------- Train Pipeline --------------------------
-python -m torch.distributed.run --nproc_per_node=8 train.py \
-                                                    --cuda \
-                                                    -dist \
-                                                    --dataset ${DATASET} \
-                                                    --root ${DATA_ROOT} \
-                                                    --model ${MODEL} \
-                                                    --batch_size ${BATCH_SIZE} \
-                                                    --img_size ${IMAGE_SIZE} \
-                                                    --wp_epoch ${WP_EPOCH} \
-                                                    --max_epoch ${MAX_EPOCH} \
-                                                    --eval_epoch ${EVAL_EPOCH} \
-                                                    --no_aug_epoch ${NO_AUG_EPOCH} \
-                                                    --resume ${RESUME} \
-                                                    --ema \
-                                                    --fp16 \
-                                                    --multi_scale \
-                                                    --sybn \

+ 0 - 62
train_single_gpu.sh

@@ -1,62 +0,0 @@
-# Dataset setting
-DATASET="coco"
-DATA_ROOT="/data/datasets/"
-# DATA_ROOT="/Users/liuhaoran/Desktop/python_work/object-detection/dataset/"
-
-# MODEL setting
-MODEL="yolov8_n"
-IMAGE_SIZE=640
-RESUME="None"
-if [[ $MODEL == *"yolov8"* ]]; then
-    # Epoch setting
-    MAX_EPOCH=500
-    BATCH_SIZE=16
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=20
-elif [[ $MODEL == *"yolox"* ]]; then
-    # Epoch setting
-    MAX_EPOCH=300
-    BATCH_SIZE=16
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=15
-elif [[ $MODEL == *"yolov7"* ]]; then
-    # Epoch setting
-    MAX_EPOCH=300
-    BATCH_SIZE=16
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=20
-elif [[ $MODEL == *"yolov5"* || $MODEL == *"yolov4"* || $MODEL == *"yolov3"* ]]; then
-    # Epoch setting
-    MAX_EPOCH=300
-    BATCH_SIZE=16
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=15
-else
-    # Epoch setting
-    MAX_EPOCH=150
-    BATCH_SIZE=16
-    WP_EPOCH=3
-    EVAL_EPOCH=10
-    NO_AUG_EPOCH=0
-fi
-
-# -------------------------- Train Pipeline --------------------------
-python train.py \
-        --cuda \
-        --dataset ${DATASET} \
-        --root ${DATA_ROOT} \
-        --model ${MODEL} \
-        --batch_size ${BATCH_SIZE} \
-        --img_size ${IMAGE_SIZE} \
-        --wp_epoch ${WP_EPOCH} \
-        --max_epoch ${MAX_EPOCH} \
-        --eval_epoch ${EVAL_EPOCH} \
-        --no_aug_epoch ${NO_AUG_EPOCH} \
-        --resume ${RESUME} \
-        --ema \
-        --fp16 \
-        --multi_scale