yjh0410 пре 1 година
родитељ
комит
ca1b120ee2
2 измењених фајлова са 11 додато и 3 уклоњено
  1. 3 3
      yolo/engine.py
  2. 8 0
      yolo/utils/misc.py

+ 3 - 3
yolo/engine.py

@@ -6,7 +6,7 @@ import random
 
 # ----------------- Extra Components -----------------
 from utils import distributed_utils
-from utils.misc import MetricLogger, SmoothedValue
+from utils.misc import MetricLogger, SmoothedValue, get_total_grad_norm
 from utils.vis_tools import vis_data
 
 # ----------------- Optimizer & LrScheduler Components -----------------
@@ -214,7 +214,7 @@ class YoloTrainer(object):
 
             # Backward
             self.scaler.scale(losses).backward()
-            gnorm = None
+            gnorm = get_total_grad_norm(model.parameters())
 
             # Optimize
             if (iter_i + 1) % self.grad_accumulate == 0:
@@ -232,8 +232,8 @@ class YoloTrainer(object):
             # Update log
             metric_logger.update(**loss_dict_reduced)
             metric_logger.update(lr=self.optimizer.param_groups[2]["lr"])
-            metric_logger.update(size=img_size)
             metric_logger.update(gnorm=gnorm)
+            metric_logger.update(size=img_size)
 
             if self.args.debug:
                 print("For debug mode, we only train 1 iteration")

+ 8 - 0
yolo/utils/misc.py

@@ -373,6 +373,14 @@ def load_weight(model, path_to_ckpt, fuse_cbn=False, fuse_rep_conv=False):
 
     return model
 
+def get_total_grad_norm(parameters, norm_type=2):
+    parameters = list(filter(lambda p: p.grad is not None, parameters))
+    norm_type = float(norm_type)
+    device = parameters[0].grad.device
+    total_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), norm_type).to(device) for p in parameters]),
+                            norm_type)
+    return total_norm
+
 ## Model EMA
 class ModelEMA(object):
     def __init__(self, model, ema_decay=0.9999, ema_tau=2000, resume=None):