yjh0410 2 éve
szülő
commit
cee58ef4e6
3 módosított fájl, 56 hozzáadás és 13 törlés
  1. 18 13
      engine.py
  2. 19 0
      train_multi_gpus.sh
  3. 19 0
      train_single_gpu.sh

+ 18 - 13
engine.py

@@ -687,6 +687,7 @@ class RTMTrainer(object):
         self.device = device
         self.criterion = criterion
         self.world_size = world_size
+        self.grad_accumulate = args.grad_accumulate
         self.no_aug_epoch = args.no_aug_epoch
         self.clip_grad = 35
         self.heavy_eval = False
@@ -719,7 +720,7 @@ class RTMTrainer(object):
         self.scaler = torch.cuda.amp.GradScaler(enabled=self.args.fp16)
 
         # ---------------------------- Build Optimizer ----------------------------
-        self.optimizer_dict['lr0'] *= self.args.batch_size / 64
+        self.optimizer_dict['lr0'] *= self.args.batch_size * self.grad_accumulate / 64
         self.optimizer, self.start_epoch = build_yolo_optimizer(self.optimizer_dict, model, self.args.resume)
 
         # ---------------------------- Build LR Scheduler ----------------------------
@@ -855,6 +856,9 @@ class RTMTrainer(object):
                 # Compute loss
                 loss_dict = self.criterion(outputs=outputs, targets=targets, epoch=self.epoch)
                 losses = loss_dict['losses']
+                # Grad Accumulate
+                if self.grad_accumulate > 1:
+                    losses /= self.grad_accumulate
 
                 loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
 
@@ -862,18 +866,19 @@ class RTMTrainer(object):
             self.scaler.scale(losses).backward()
 
             # Optimize
-            if self.clip_grad > 0:
-                # unscale gradients
-                self.scaler.unscale_(self.optimizer)
-                # clip gradients
-                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
-            # optimizer.step
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-            self.optimizer.zero_grad()
-            # ema
-            if self.model_ema is not None:
-                self.model_ema.update(model)
+            if ni % self.grad_accumulate == 0:
+                if self.clip_grad > 0:
+                    # unscale gradients
+                    self.scaler.unscale_(self.optimizer)
+                    # clip gradients
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=self.clip_grad)
+                # optimizer.step
+                self.scaler.step(self.optimizer)
+                self.scaler.update()
+                self.optimizer.zero_grad()
+                # ema
+                if self.model_ema is not None:
+                    self.model_ema.update(model)
 
             # Logs
             if distributed_utils.is_main_process() and iter_i % 10 == 0:

+ 19 - 0
train_multi_gpus.sh

@@ -35,3 +35,22 @@ python -m torch.distributed.run --nproc_per_node=8 train.py \
 #                                                     --sybn \
 #                                                     --multi_scale \
 #                                                     #  --resume weights/coco/yolov5_l/yolov5_l_best.pth \
+
+# -------------------------- Train My RTCDet series --------------------------
+# python -m torch.distributed.run --nproc_per_node=8 train.py \
+#                                                     --cuda \
+#                                                     -dist \
+#                                                     -d coco \
+#                                                     --root /data/datasets/ \
+#                                                     -m rtcdet_v1_l\
+#                                                     -bs 128 \
+#                                                     -size 640 \
+#                                                     --wp_epoch 3 \
+#                                                     --max_epoch 300 \
+#                                                     --eval_epoch 10 \
+#                                                     --no_aug_epoch 20 \
+#                                                     --ema \
+#                                                     --fp16 \
+#                                                     --sybn \
+#                                                     --multi_scale \
+#                                                     #  --resume weights/coco/rtcdet_v1_l/rtcdet_v1_l_best.pth \

+ 19 - 0
train_single_gpu.sh

@@ -34,3 +34,22 @@ python train.py \
 #         --multi_scale \
 #         # --resume weights/coco/yolov5_l/yolov5_l_best.pth \
 #         # --eval_first
+
+# -------------------------- Train My RTCDet series --------------------------
+# python train.py \
+#         --cuda \
+#         -d coco \
+#         --root /mnt/share/ssd2/dataset/ \
+#         -m rtcdet_v1_l \
+#         -bs 16 \
+#         -size 640 \
+#         --wp_epoch 3 \
+#         --max_epoch 300 \
+#         --eval_epoch 10 \
+#         --no_aug_epoch 20 \
+#         --grad_accumulate 8 \
+#         --ema \
+#         --fp16 \
+#         --multi_scale \
+#         # --resume weights/coco/rtcdet_v1_l/rtcdet_v1_l_best.pth \
+#         # --eval_first