пре 1 година · ac19413eb4
--- a/odlab/train.sh
+++ b/odlab/train.sh
@@ -2,17 +2,9 @@
 
															 MODEL=$1
														
 
															 DATASET=$2
														
 
															 DATA_ROOT=$3
														
 
															-WORLD_SIZE=$4
														
 
															-MASTER_PORT=$5
														
 
															-if [[ $MODEL == *"yolof"* ]]; then
														
 
															-    # Epoch setting
														
 
															-    BATCH_SIZE=64
														
 
															-    EVAL_EPOCH=2
														
 
															-elif [[ $MODEL == *"fcos"* ]]; then
														
 
															-    # Epoch setting
														
 
															-    BATCH_SIZE=16
														
 
															-    EVAL_EPOCH=2
														
 
															-fi
														
 
															+BATCH_SIZE=$4
														
 
															+WORLD_SIZE=$5
														
 
															+MASTER_PORT=$6
														
 
															 # -------------------------- Train Pipeline --------------------------
														
 
															 if [ $WORLD_SIZE == 1 ]; then
														
@@ -21,8 +13,7 @@ if [ $WORLD_SIZE == 1 ]; then
 
															         --dataset ${DATASET}  \
														
 
															         --root ${DATA_ROOT} \
														
 
															         --model ${MODEL} \
														
 
															-        --batch_size ${BATCH_SIZE} \
														
 
															-        --eval_epoch ${EVAL_EPOCH}
														
 
															+        --batch_size ${BATCH_SIZE}
														
 
															 elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then
														
 
															     python -m torch.distributed.run --nproc_per_node=$WORLD_SIZE --master_port ${MASTER_PORT}  \
														
 
															         train.py \
														
@@ -31,8 +22,7 @@ elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then
 
															         --dataset ${DATASET}  \
														
 
															         --root ${DATA_ROOT} \
														
 
															         --model ${MODEL} \
														
 
															-        --batch_size ${BATCH_SIZE} \
														
 
															-        --eval_epoch ${EVAL_EPOCH}
														
 
															+        --batch_size ${BATCH_SIZE}
														
 
															 else
														
 
															     echo "The WORLD_SIZE is set to a value greater than 8, indicating the use of multi-machine \
														
 
															           multi-card training mode, which is currently unsupported."