train.sh 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. # Dataset setting
  2. DATASET="coco"
  3. DATA_ROOT="/data/datasets/"
  4. # DATA_ROOT="/Users/liuhaoran/Desktop/python_work/object-detection/dataset/"
  5. # MODEL setting
  6. MODEL="yolox_s"
  7. IMAGE_SIZE=640
  8. RESUME="None"
  9. RESUME="weights/coco/yolox_s/yolox_s_best.pth"
  10. if [[ $MODEL == *"yolov8"* ]]; then
  11. # Epoch setting
  12. BATCH_SIZE=128
  13. MAX_EPOCH=500
  14. WP_EPOCH=3
  15. EVAL_EPOCH=10
  16. NO_AUG_EPOCH=20
  17. elif [[ $MODEL == *"yolox2"* ]]; then
  18. # Epoch setting
  19. BATCH_SIZE=128
  20. MAX_EPOCH=300
  21. WP_EPOCH=3
  22. EVAL_EPOCH=10
  23. NO_AUG_EPOCH=20
  24. elif [[ $MODEL == *"yolox"* ]]; then
  25. # Epoch setting
  26. BATCH_SIZE=128
  27. MAX_EPOCH=300
  28. WP_EPOCH=3
  29. EVAL_EPOCH=10
  30. NO_AUG_EPOCH=20
  31. elif [[ $MODEL == *"yolov7"* ]]; then
  32. # Epoch setting
  33. BATCH_SIZE=128
  34. MAX_EPOCH=300
  35. WP_EPOCH=3
  36. EVAL_EPOCH=10
  37. NO_AUG_EPOCH=20
  38. elif [[ $MODEL == *"yolov5"* ]]; then
  39. # Epoch setting
  40. BATCH_SIZE=128
  41. MAX_EPOCH=300
  42. WP_EPOCH=3
  43. EVAL_EPOCH=10
  44. NO_AUG_EPOCH=20
  45. elif [[ $MODEL == *"yolov4"* ]]; then
  46. # Epoch setting
  47. BATCH_SIZE=128
  48. MAX_EPOCH=300
  49. WP_EPOCH=3
  50. EVAL_EPOCH=10
  51. NO_AUG_EPOCH=20
  52. elif [[ $MODEL == *"yolov3"* ]]; then
  53. # Epoch setting
  54. BATCH_SIZE=128
  55. MAX_EPOCH=300
  56. WP_EPOCH=3
  57. EVAL_EPOCH=10
  58. NO_AUG_EPOCH=20
  59. else
  60. # Epoch setting
  61. BATCH_SIZE=128
  62. MAX_EPOCH=150
  63. WP_EPOCH=3
  64. EVAL_EPOCH=10
  65. NO_AUG_EPOCH=0
  66. fi
  67. # -------------------------- Train Pipeline --------------------------
  68. WORLD_SIZE=$1
  69. if [ $WORLD_SIZE == 1 ]; then
  70. python train.py \
  71. --cuda \
  72. --dataset ${DATASET} \
  73. --root ${DATA_ROOT} \
  74. --model ${MODEL} \
  75. --batch_size ${BATCH_SIZE} \
  76. --img_size ${IMAGE_SIZE} \
  77. --wp_epoch ${WP_EPOCH} \
  78. --max_epoch ${MAX_EPOCH} \
  79. --eval_epoch ${EVAL_EPOCH} \
  80. --no_aug_epoch ${NO_AUG_EPOCH} \
  81. --resume ${RESUME} \
  82. --ema \
  83. --fp16 \
  84. --multi_scale
  85. elif [[ $WORLD_SIZE -gt 1 && $WORLD_SIZE -le 8 ]]; then
  86. python -m torch.distributed.run --nproc_per_node=${WORLD_SIZE} --master_port 1669 train.py \
  87. --cuda \
  88. -dist \
  89. --dataset ${DATASET} \
  90. --root ${DATA_ROOT} \
  91. --model ${MODEL} \
  92. --batch_size ${BATCH_SIZE} \
  93. --img_size ${IMAGE_SIZE} \
  94. --wp_epoch ${WP_EPOCH} \
  95. --max_epoch ${MAX_EPOCH} \
  96. --eval_epoch ${EVAL_EPOCH} \
  97. --no_aug_epoch ${NO_AUG_EPOCH} \
  98. --resume ${RESUME} \
  99. --ema \
  100. --fp16 \
  101. --multi_scale \
  102. --sybn
  103. else
  104. echo "The WORLD_SIZE is set to a value greater than 8, indicating the use of multi-machine \
  105. multi-card training mode, which is currently unsupported."
  106. exit 1
  107. fi