浏览代码

remobe pycache

yjh0410 2 年之前
父节点
当前提交
1ae61635c2
共有 44 个文件被更改,包括 99 次插入221 次删除
  1. 二进制
      __pycache__/engine.cpython-36.pyc
  2. 二进制
      config/__pycache__/__init__.cpython-36.pyc
  3. 二进制
      config/__pycache__/transform_config.cpython-36.pyc
  4. 二进制
      config/__pycache__/yolov1_config.cpython-36.pyc
  5. 二进制
      config/__pycache__/yolov2_config.cpython-36.pyc
  6. 1 6
      config/transform_config.py
  7. 7 8
      config/yolov1_config.py
  8. 二进制
      dataset/__pycache__/__init__.cpython-36.pyc
  9. 二进制
      dataset/__pycache__/coco.cpython-36.pyc
  10. 二进制
      dataset/__pycache__/voc.cpython-36.pyc
  11. 0 8
      dataset/coco.py
  12. 2 10
      dataset/data_augment/__init__.py
  13. 二进制
      dataset/data_augment/__pycache__/__init__.cpython-36.pyc
  14. 二进制
      dataset/data_augment/__pycache__/ssd_augment.cpython-36.pyc
  15. 二进制
      dataset/data_augment/__pycache__/yolov5_augment.cpython-36.pyc
  16. 5 25
      dataset/data_augment/ssd_augment.py
  17. 0 6
      dataset/data_augment/yolov5_augment.py
  18. 0 8
      dataset/voc.py
  19. 44 33
      engine.py
  20. 二进制
      evaluator/__pycache__/coco_evaluator.cpython-36.pyc
  21. 二进制
      evaluator/__pycache__/voc_evaluator.cpython-36.pyc
  22. 二进制
      models/__pycache__/__init__.cpython-36.pyc
  23. 二进制
      models/yolov1/__pycache__/build.cpython-36.pyc
  24. 二进制
      models/yolov1/__pycache__/loss.cpython-36.pyc
  25. 二进制
      models/yolov1/__pycache__/matcher.cpython-36.pyc
  26. 二进制
      models/yolov1/__pycache__/yolov1.cpython-36.pyc
  27. 二进制
      models/yolov1/__pycache__/yolov1_backbone.cpython-36.pyc
  28. 二进制
      models/yolov1/__pycache__/yolov1_basic.cpython-36.pyc
  29. 二进制
      models/yolov1/__pycache__/yolov1_neck.cpython-36.pyc
  30. 13 16
      train.py
  31. 二进制
      utils/__pycache__/__init__.cpython-36.pyc
  32. 二进制
      utils/__pycache__/box_ops.cpython-36.pyc
  33. 二进制
      utils/__pycache__/com_flops_params.cpython-36.pyc
  34. 二进制
      utils/__pycache__/distributed_utils.cpython-36.pyc
  35. 二进制
      utils/__pycache__/fuse_conv_bn.cpython-36.pyc
  36. 二进制
      utils/__pycache__/misc.cpython-36.pyc
  37. 二进制
      utils/__pycache__/vis_tools.cpython-36.pyc
  38. 二进制
      utils/solver/__pycache__/__init__.cpython-36.pyc
  39. 二进制
      utils/solver/__pycache__/lr_scheduler.cpython-36.pyc
  40. 二进制
      utils/solver/__pycache__/optimizer.cpython-36.pyc
  41. 二进制
      utils/solver/__pycache__/warmup_schedule.cpython-36.pyc
  42. 5 10
      utils/solver/lr_scheduler.py
  43. 22 37
      utils/solver/optimizer.py
  44. 0 54
      utils/solver/warmup_schedule.py

二进制
__pycache__/engine.cpython-36.pyc


二进制
config/__pycache__/__init__.cpython-36.pyc


二进制
config/__pycache__/transform_config.cpython-36.pyc


二进制
config/__pycache__/yolov1_config.cpython-36.pyc


二进制
config/__pycache__/yolov2_config.cpython-36.pyc


+ 1 - 6
config/transform_config.py

@@ -3,9 +3,6 @@
 
 yolov5_trans_config = {
     'aug_type': 'yolov5',
-    # Pixel mean & std
-    'pixel_mean': [0., 0., 0.],
-    'pixel_std': [1., 1., 1.],
     # Basic Augment
     'degrees': 0.0,
     'translate': 0.2,
@@ -26,9 +23,7 @@ yolov5_trans_config = {
 
 ssd_trans_config = {
     'aug_type': 'ssd',
-    'pixel_mean': [0.406, 0.456, 0.485],
-    'pixel_std': [0.225, 0.224, 0.229],
-    # Mosaic & Mixup
+    # Mosaic & Mixup are nor used for SSD-style augmentation
     'mosaic_prob': 0.,
     'mixup_prob': 0.,
     'mosaic_type': 'yolov5_mosaic',

+ 7 - 8
config/yolov1_config.py

@@ -6,22 +6,21 @@ yolov1_cfg = {
     # loss weight
     'loss_obj_weight': 1.0,
     'loss_cls_weight': 1.0,
-    'loss_txty_weight': 1.0,
-    'loss_twth_weight': 1.0,
+    'loss_reg_weight': 5.0,
     # training configuration
     'no_aug_epoch': -1,
     # optimizer
-    'optimizer': 'sgd',        # optional: sgd, yolov5_sgd
-    'momentum': 0.9,           # SGD: 0.937;    AdamW: invalid
+    'optimizer': 'sgd',        # optional: sgd, adam, adamw
+    'momentum': 0.937,         # SGD: 0.937;    AdamW: invalid
     'weight_decay': 5e-4,      # SGD: 5e-4;     AdamW: 5e-2
     'clip_grad': 10,           # SGD: 10.0;     AdamW: -1
     # model EMA
     'ema_decay': 0.9999,       # SGD: 0.9999;   AdamW: 0.9998
+    'ema_tau': 2000,
     # lr schedule
     'scheduler': 'linear',
-    'lr0': 0.001,              # SGD: 0.01;     AdamW: 0.004
+    'lr0': 0.01,              # SGD: 0.01;     AdamW: 0.004
     'lrf': 0.01,               # SGD: 0.01;     AdamW: 0.05
-    # warmup strategy
-    'warmup': 'linear',
-    'warmup_factor': 0.00066667,
+    'warmup_momentum': 0.8,
+    'warmup_bias_lr': 0.1,
 }

二进制
dataset/__pycache__/__init__.cpython-36.pyc


二进制
dataset/__pycache__/coco.cpython-36.pyc


二进制
dataset/__pycache__/voc.cpython-36.pyc


+ 0 - 8
dataset/coco.py

@@ -237,9 +237,6 @@ if __name__ == "__main__":
     img_size = 640
     yolov5_trans_config = {
         'aug_type': 'yolov5',
-        # Pixel mean & std
-        'pixel_mean': [0., 0., 0.],
-        'pixel_std': [1., 1., 1.],
         # Basic Augment
         'degrees': 0.0,
         'translate': 0.2,
@@ -258,8 +255,6 @@ if __name__ == "__main__":
     }
     ssd_trans_config = {
         'aug_type': 'ssd',
-        'pixel_mean': [0.406, 0.456, 0.485],
-        'pixel_std': [0.225, 0.224, 0.229],
         'mosaic_prob': 0.0,
         'mixup_prob': 0.0
     }
@@ -285,10 +280,7 @@ if __name__ == "__main__":
         image, target, deltas = dataset.pull_item(i)
         # to numpy
         image = image.permute(1, 2, 0).numpy()
-        # to BGR
-        image = image[:, :, (2, 1, 0)]
         # denormalize
-        image = image * yolov5_trans_config['pixel_std'] + yolov5_trans_config['pixel_mean']
         image *= 255.
         # to uint8
         image = image.astype(np.uint8)

+ 2 - 10
dataset/data_augment/__init__.py

@@ -5,17 +5,9 @@ from .yolov5_augment import YOLOv5Augmentation, YOLOv5BaseTransform
 def build_transform(img_size, trans_config, is_train=False):
     if trans_config['aug_type'] == 'ssd':
         if is_train:
-            transform = SSDAugmentation(
-                img_size=img_size,
-                pixel_mean=trans_config['pixel_mean'],
-                pixel_std=trans_config['pixel_std']
-            )
+            transform = SSDAugmentation(img_size=img_size)
         else:
-            transform = SSDBaseTransform(
-                img_size=img_size,
-                pixel_mean=trans_config['pixel_mean'],
-                pixel_std=trans_config['pixel_std']
-            )
+            transform = SSDBaseTransform(img_size=img_size)
 
     elif trans_config['aug_type'] == 'yolov5':
         if is_train:

二进制
dataset/data_augment/__pycache__/__init__.cpython-36.pyc


二进制
dataset/data_augment/__pycache__/ssd_augment.cpython-36.pyc


二进制
dataset/data_augment/__pycache__/yolov5_augment.cpython-36.pyc


+ 5 - 25
dataset/data_augment/ssd_augment.py

@@ -57,15 +57,9 @@ class ConvertFromInts(object):
 
 
 class Normalize(object):
-    def __init__(self, pixel_mean=None, pixel_std=None):
-        self.pixel_mean = np.array(pixel_mean, dtype=np.float32)
-        self.pixel_std = np.array(pixel_std, dtype=np.float32)
-
     def __call__(self, image, boxes=None, labels=None):
         image = image.astype(np.float32)
         image /= 255.
-        image -= self.pixel_mean
-        image /= self.pixel_std
 
         return image, boxes, labels
 
@@ -276,9 +270,6 @@ class RandomSampleCrop(object):
 
 
 class Expand(object):
-    def __init__(self, pixel_mean):
-        self.pixel_mean = pixel_mean
-
     def __call__(self, image, boxes, labels):
         if random.randint(2):
             return image, boxes, labels
@@ -291,7 +282,6 @@ class Expand(object):
         expand_image = np.zeros(
             (int(height*ratio), int(width*ratio), depth),
             dtype=image.dtype)
-        expand_image[:, :, :] = self.pixel_mean
         expand_image[int(top):int(top + height),
                      int(left):int(left + width)] = image
         image = expand_image
@@ -365,18 +355,16 @@ class PhotometricDistort(object):
 # ----------------------- Main Functions
 ## SSD-style Augmentation
 class SSDAugmentation(object):
-    def __init__(self, img_size=640, pixel_mean=(0.406, 0.456, 0.485), pixel_std=(0.225, 0.224, 0.229)):
-        self.pixel_mean = pixel_mean
+    def __init__(self, img_size=640):
         self.img_size = img_size
-        self.pixel_std = pixel_std
         self.augment = Compose([
             ConvertFromInts(),                         # 将int类型转换为float32类型
             PhotometricDistort(),                      # 图像颜色增强
-            Expand(self.pixel_mean),                   # 扩充增强
+            Expand(),                                  # 扩充增强
             RandomSampleCrop(),                        # 随机剪裁
             RandomHorizontalFlip(),                    # 随机水平翻转
             Resize(self.img_size),                     # resize操作
-            Normalize(self.pixel_mean, self.pixel_std) # 图像颜色归一化
+            Normalize()                                # 图像颜色归一化
         ])
 
     def __call__(self, image, target, mosaic=False):
@@ -385,9 +373,6 @@ class SSDAugmentation(object):
         # augment
         image, boxes, labels = self.augment(image, boxes, labels)
 
-        # to rgb
-        image = image[..., (2, 1, 0)]
-
         # to tensor
         img_tensor = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
         target['boxes'] = torch.from_numpy(boxes).float()
@@ -399,10 +384,8 @@ class SSDAugmentation(object):
 
 ## SSD-style valTransform
 class SSDBaseTransform(object):
-    def __init__(self, img_size, pixel_mean=(0.406, 0.456, 0.485), pixel_std=(0.225, 0.224, 0.229)):
+    def __init__(self, img_size):
         self.img_size = img_size
-        self.pixel_mean = np.array(pixel_mean, dtype=np.float32)
-        self.pixel_std = np.array(pixel_std, dtype=np.float32)
 
     def __call__(self, image, target=None, mosaic=False):
         # resize
@@ -410,7 +393,7 @@ class SSDBaseTransform(object):
         image = cv2.resize(image, (self.img_size, self.img_size)).astype(np.float32)
         
         # normalize
-        image = (image / 255. - self.pixel_mean) / self.pixel_std
+        image /= 255.
         if target is not None:
             boxes = target['boxes'].copy()
             labels = target['labels'].copy()
@@ -419,9 +402,6 @@ class SSDBaseTransform(object):
             boxes[..., [1, 3]] = boxes[..., [1, 3]] / orig_h * img_h
             target['boxes'] = boxes
         
-        # to rgb
-        image = image[..., (2, 1, 0)]
-
         # to tensor
         img_tensor = torch.from_numpy(image).permute(2, 0, 1).contiguous().float()
         if target is not None:

+ 0 - 6
dataset/data_augment/yolov5_augment.py

@@ -344,9 +344,6 @@ class YOLOv5Augmentation(object):
             boxes[..., [0, 2]] = w - boxes[..., [2, 0]]
             target["boxes"] = boxes
 
-        # to rgb
-        img = img[..., (2, 1, 0)]
-
         # to tensor
         img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
 
@@ -390,9 +387,6 @@ class YOLOv5BaseTransform(object):
 
         img_h, img_w = img.shape[:2]
 
-        # to rgb
-        img = img[..., (2, 1, 0)]
-
         # to tensor
         img_tensor = torch.from_numpy(img).permute(2, 0, 1).contiguous().float()
 

+ 0 - 8
dataset/voc.py

@@ -260,9 +260,6 @@ if __name__ == "__main__":
     img_size = 640
     yolov5_trans_config = {
         'aug_type': 'yolov5',
-        # Pixel mean & std
-        'pixel_mean': [0., 0., 0.],
-        'pixel_std': [1., 1., 1.],
         # Basic Augment
         'degrees': 0.0,
         'translate': 0.2,
@@ -281,8 +278,6 @@ if __name__ == "__main__":
     }
     ssd_trans_config = {
         'aug_type': 'ssd',
-        'pixel_mean': [0.406, 0.456, 0.485],
-        'pixel_std': [0.225, 0.224, 0.229],
         'mosaic_prob': 0.0,
         'mixup_prob': 0.0
     }
@@ -306,10 +301,7 @@ if __name__ == "__main__":
         image, target, deltas = dataset.pull_item(i)
         # to numpy
         image = image.permute(1, 2, 0).numpy()
-        # to BGR
-        image = image[:, :, (2, 1, 0)]
         # denormalize
-        image = image * ssd_trans_config['pixel_std'] + ssd_trans_config['pixel_mean']
         image *= 255.
         # to uint8
         image = image.astype(np.uint8)

+ 44 - 33
engine.py

@@ -3,24 +3,21 @@ import torch.distributed as dist
 
 import time
 import os
+import math
+import numpy as np
 import random
 
 from utils import distributed_utils
 from utils.vis_tools import vis_data
 
 
-def rescale_image_targets(images, targets, stride, min_box_size):
+def rescale_image_targets(images, targets, max_stride, min_box_size):
     """
         Deployed for Multi scale trick.
     """
-    if isinstance(stride, int):
-        max_stride = stride
-    elif isinstance(stride, list):
-        max_stride = max(stride)
-
     # During training phase, the shape of input image is square.
     old_img_size = images.shape[-1]
-    new_img_size = random.randrange(old_img_size * 0.5, old_img_size * 1.0 + max_stride) // max_stride * max_stride  # size
+    new_img_size = random.randrange(old_img_size * 0.5, old_img_size * 1.5 + max_stride) // max_stride * max_stride  # size
     if new_img_size / old_img_size != 1:
         # interpolate
         images = torch.nn.functional.interpolate(
@@ -54,10 +51,12 @@ def train_one_epoch(epoch,
                     ema,
                     model,
                     criterion,
+                    cfg, 
                     dataloader, 
                     optimizer,
-                    lr_scheduler,
-                    warmup_scheduler,
+                    scheduler,
+                    lf,
+                    scaler,
                     last_opt_step):
     epoch_size = len(dataloader)
     img_size = args.img_size
@@ -69,34 +68,42 @@ def train_one_epoch(epoch,
     for iter_i, (images, targets) in enumerate(dataloader):
         ni = iter_i + epoch * epoch_size
         # Warmup
-        if ni < nw:
-            warmup_scheduler.warmup(ni, optimizer)
+        if ni <= nw:
+            xi = [0, nw]  # x interp
+            accumulate = max(1, np.interp(ni, xi, [1, 64 / args.batch_size]).round())
+            for j, x in enumerate(optimizer.param_groups):
+                # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
+                x['lr'] = np.interp(
+                    ni, xi, [cfg['warmup_bias_lr'] if j == 0 else 0.0, x['initial_lr'] * lf(epoch)])
+                if 'momentum' in x:
+                    x['momentum'] = np.interp(ni, xi, [cfg['warmup_momentum'], cfg['momentum']])
                             
         # visualize train targets
         if args.vis_tgt:
             vis_data(images, targets)
 
         # to device
-        images = images.to(device, non_blocking=True).float()
+        images = images.to(device, non_blocking=True).float() / 255.
 
         # multi scale
-        if args.multi_scale and ni % 10 == 0:
+        if args.multi_scale:
             images, targets, img_size = rescale_image_targets(
-                images, targets, model.stride, args.min_box_size)
+                images, targets, max(model.stride), args.min_box_size)
             
         # inference
-        outputs = model(images)
-        
-        # loss
-        loss_dict = criterion(outputs=outputs, targets=targets)
-        losses = loss_dict['losses']
+        with torch.cuda.amp.autocast(enabled=args.fp16):
+            outputs = model(images)
+            # loss
+            loss_dict = criterion(outputs=outputs, targets=targets)
+            losses = loss_dict['losses']
+            losses *= images.shape[0]  # loss * bs
 
-        # reduce            
-        loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
+            # reduce            
+            loss_dict_reduced = distributed_utils.reduce_dict(loss_dict)
 
-        if args.distributed:
-            # gradient averaged between devices in DDP mode
-            losses *= distributed_utils.get_world_size()
+            if args.distributed:
+                # gradient averaged between devices in DDP mode
+                losses *= distributed_utils.get_world_size()
 
         # check loss
         try:
@@ -107,16 +114,20 @@ def train_one_epoch(epoch,
             print(loss_dict)
 
         # backward
-        losses /= accumulate
-        losses.backward()
+        scaler.scale(losses).backward()
 
         # Optimize
         if ni - last_opt_step >= accumulate:
+            if cfg['clip_grad'] > 0:
+                # unscale gradients
+                scaler.unscale_(optimizer)
+                # clip gradients
+                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=cfg['clip_grad'])
             # optimizer.step
-            optimizer.step()
+            scaler.step(optimizer)
+            scaler.update()
             optimizer.zero_grad()
-
-            # EMA
+            # ema
             if ema:
                 ema.update(model)
             last_opt_step = ni
@@ -128,7 +139,7 @@ def train_one_epoch(epoch,
             # basic infor
             log =  '[Epoch: {}/{}]'.format(epoch+1, total_epochs)
             log += '[Iter: {}/{}]'.format(iter_i, epoch_size)
-            log += '[lr: {:.6f}]'.format(cur_lr[0])
+            log += '[lr: {:.6f}]'.format(cur_lr[2])
             # loss infor
             for k in loss_dict_reduced.keys():
                 if k == 'losses' and args.distributed:
@@ -146,7 +157,7 @@ def train_one_epoch(epoch,
             
             t0 = time.time()
     
-    lr_scheduler.step()
+    scheduler.step()
 
     return last_opt_step
 
@@ -163,7 +174,7 @@ def val_one_epoch(args,
         if evaluator is None:
             print('No evaluator ... save model and go on training.')
             print('Saving state, epoch: {}'.format(epoch + 1))
-            weight_name = '{}_epoch_{}.pth'.format(args.model, epoch + 1)
+            weight_name = '{}_epoch_{}.pth'.format(args.version, epoch + 1)
             checkpoint_path = os.path.join(path_to_save, weight_name)
             torch.save({'model': model.state_dict(),
                         'mAP': -1.,
@@ -187,7 +198,7 @@ def val_one_epoch(args,
                 best_map = cur_map
                 # save model
                 print('Saving state, epoch:', epoch + 1)
-                weight_name = '{}_epoch_{}_{:.2f}.pth'.format(args.model, epoch + 1, best_map*100)
+                weight_name = '{}_epoch_{}_{:.2f}.pth'.format(args.version, epoch + 1, best_map*100)
                 checkpoint_path = os.path.join(path_to_save, weight_name)
                 torch.save({'model': model.state_dict(),
                             'mAP': round(best_map*100, 1),

二进制
evaluator/__pycache__/coco_evaluator.cpython-36.pyc


二进制
evaluator/__pycache__/voc_evaluator.cpython-36.pyc


二进制
models/__pycache__/__init__.cpython-36.pyc


二进制
models/yolov1/__pycache__/build.cpython-36.pyc


二进制
models/yolov1/__pycache__/loss.cpython-36.pyc


二进制
models/yolov1/__pycache__/matcher.cpython-36.pyc


二进制
models/yolov1/__pycache__/yolov1.cpython-36.pyc


二进制
models/yolov1/__pycache__/yolov1_backbone.cpython-36.pyc


二进制
models/yolov1/__pycache__/yolov1_basic.cpython-36.pyc


二进制
models/yolov1/__pycache__/yolov1_neck.cpython-36.pyc


+ 13 - 16
train.py

@@ -12,7 +12,6 @@ from utils import distributed_utils
 from utils.com_flops_params import FLOPs_and_Params
 from utils.misc import ModelEMA, CollateFunc, build_dataset, build_dataloader
 from utils.solver.optimizer import build_optimizer
-from utils.solver.warmup_schedule import build_warmup
 from utils.solver.lr_scheduler import build_lr_scheduler
 
 from engine import train_one_epoch, val_one_epoch
@@ -170,6 +169,9 @@ def train():
         # wait for all processes to synchronize
         dist.barrier()
 
+    # amp
+    scaler = torch.cuda.amp.GradScaler(enabled=args.fp16)
+
     # batch size
     total_bs = args.batch_size
     accumulate = max(1, round(64 / total_bs))
@@ -177,24 +179,18 @@ def train():
 
     # optimizer
     model_cfg['weight_decay'] *= total_bs * accumulate / 64
-    optimizer, start_epoch = build_optimizer(
-        model_cfg, model_without_ddp, model_cfg['lr0'], args.resume)
-
-    # warmup scheduler
-    warmup_scheduler = build_warmup(
-        model_cfg, model_cfg['lr0'], len(dataloader) * args.wp_epoch)
+    optimizer, start_epoch = build_optimizer(model_cfg, model_without_ddp, model_cfg['lr0'], args.resume)
 
     # Scheduler
-    lr_scheduler = build_lr_scheduler(
-        args, model_cfg, optimizer, args.max_epoch)
-    lr_scheduler.last_epoch = start_epoch - 1  # do not move
+    scheduler, lf = build_lr_scheduler(model_cfg, optimizer, args.max_epoch)
+    scheduler.last_epoch = start_epoch - 1  # do not move
     if args.resume:
-        lr_scheduler.step()
+        scheduler.step()
 
     # EMA
     if args.ema and distributed_utils.get_rank() in [-1, 0]:
         print('Build ModelEMA ...')
-        ema = ModelEMA(model, model_cfg['ema_decay'], start_epoch * len(dataloader))
+        ema = ModelEMA(model, decay=model_cfg['ema_decay'], tau=model_cfg['ema_tau'], updates=start_epoch * len(dataloader))
     else:
         ema = None
 
@@ -240,12 +236,13 @@ def train():
             ema=ema, 
             model=model,
             criterion=criterion,
+            cfg=model_cfg, 
             dataloader=dataloader, 
             optimizer=optimizer,
-            lr_scheduler=lr_scheduler,
-            warmup_scheduler=warmup_scheduler,
-            last_opt_step=last_opt_step
-            )
+            scheduler=scheduler,
+            lf=lf,
+            scaler=scaler,
+            last_opt_step=last_opt_step)
 
         # eval
         if heavy_eval:

二进制
utils/__pycache__/__init__.cpython-36.pyc


二进制
utils/__pycache__/box_ops.cpython-36.pyc


二进制
utils/__pycache__/com_flops_params.cpython-36.pyc


二进制
utils/__pycache__/distributed_utils.cpython-36.pyc


二进制
utils/__pycache__/fuse_conv_bn.cpython-36.pyc


二进制
utils/__pycache__/misc.cpython-36.pyc


二进制
utils/__pycache__/vis_tools.cpython-36.pyc


二进制
utils/solver/__pycache__/__init__.cpython-36.pyc


二进制
utils/solver/__pycache__/lr_scheduler.cpython-36.pyc


二进制
utils/solver/__pycache__/optimizer.cpython-36.pyc


二进制
utils/solver/__pycache__/warmup_schedule.cpython-36.pyc


+ 5 - 10
utils/solver/lr_scheduler.py

@@ -2,26 +2,21 @@ import math
 import torch
 
 
-def build_lr_scheduler(args, cfg, optimizer, max_epochs):
+def build_lr_scheduler(cfg, optimizer, epochs):
     """Build learning rate scheduler from cfg file."""
     print('==============================')
     print('Lr Scheduler: {}'.format(cfg['scheduler']))
 
     if cfg['scheduler'] == 'cosine':
-        lf = lambda x: ((1 - math.cos(x * math.pi / max_epochs)) / 2) * (cfg['lrf'] - 1) + 1
-        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
+        lf = lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) * (cfg['lrf'] - 1) + 1
         
     elif cfg['scheduler'] == 'linear':
-        lf = lambda x: (1 - x / max_epochs) * (1.0 - cfg['lrf']) + cfg['lrf']
-        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
-
-    elif cfg['scheduler'] == 'step':
-        scheduler = torch.optim.lr_scheduler.MultiStepLR(
-            optimizer, milestones=args.step_epoch, gamma=0.1)
+        lf = lambda x: (1 - x / epochs) * (1.0 - cfg['lrf']) + cfg['lrf']
 
     else:
         print('unknown lr scheduler.')
         exit(0)
 
+    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
 
-    return scheduler
+    return scheduler, lf

+ 22 - 37
utils/solver/optimizer.py

@@ -1,50 +1,36 @@
 import torch
 import torch.nn as nn
-from torch import optim
 
 
-def build_optimizer(cfg, model, base_lr=0.0, resume=None):
+def build_optimizer(cfg, model, base_lr=0.01, resume=None):
     print('==============================')
     print('Optimizer: {}'.format(cfg['optimizer']))
+    print('--base lr: {}'.format(base_lr))
     print('--momentum: {}'.format(cfg['momentum']))
     print('--weight_decay: {}'.format(cfg['weight_decay']))
 
-    if cfg['optimizer'] == 'sgd':
-        optimizer = optim.SGD(model.parameters(), 
-                            lr=base_lr,
-                            momentum=cfg['momentum'],
-                            weight_decay=cfg['weight_decay']
-                            )
+    g = [], [], []  # optimizer parameter groups
+    bn = tuple(v for k, v in nn.__dict__.items() if 'Norm' in k)  # normalization layers, i.e. BatchNorm2d()
+    for v in model.modules():
+        if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):  # bias (no decay)
+            g[2].append(v.bias)
+        if isinstance(v, bn):  # weight (no decay)
+            g[1].append(v.weight)
+        elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):  # weight (with decay)
+            g[0].append(v.weight)
 
-    elif cfg['optimizer'] == 'yolov5_sgd':
-        pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
-
-        for k, v in model.named_modules():
-            if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
-                pg2.append(v.bias)  # biases
-            if isinstance(v, nn.BatchNorm2d) or "bn" in k:
-                pg0.append(v.weight)  # no decay
-            elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
-                pg1.append(v.weight)  # apply decay
+    if cfg['optimizer'] == 'adam':
+        optimizer = torch.optim.Adam(g[2], lr=base_lr)  # adjust beta1 to momentum
+    elif cfg['optimizer'] == 'adamw':
+        optimizer = torch.optim.AdamW(g[2], lr=base_lr, weight_decay=0.0)
+    elif cfg['optimizer'] == 'sgd':
+        optimizer = torch.optim.SGD(g[2], lr=base_lr, momentum=cfg['momentum'], nesterov=True)
+    else:
+        raise NotImplementedError('Optimizer {} not implemented.'.format(cfg['optimizer']))
 
-        optimizer = optim.SGD(
-            pg0, lr=base_lr, momentum=cfg['momentum'], nesterov=True
-            )
-        optimizer.add_param_group(
-            {"params": pg1, "weight_decay": cfg['weight_decay']}
-        )  # add pg1 with weight_decay
-        optimizer.add_param_group({"params": pg2})
+    optimizer.add_param_group({'params': g[0], 'weight_decay': cfg['weight_decay']})  # add g0 with weight_decay
+    optimizer.add_param_group({'params': g[1], 'weight_decay': 0.0})                  # add g1 (BatchNorm2d weights)
 
-    elif cfg['optimizer'] == 'adam':
-        optimizer = optim.Adam(model.parameters(), 
-                                lr=base_lr,
-                                weight_decay=cfg['weight_decay'])
-                                
-    elif cfg['optimizer'] == 'adamw':
-        optimizer = optim.AdamW(model.parameters(), 
-                                lr=base_lr,
-                                weight_decay=cfg['weight_decay'])
-                                
     start_epoch = 0
     if resume is not None:
         print('keep training: ', resume)
@@ -53,6 +39,5 @@ def build_optimizer(cfg, model, base_lr=0.0, resume=None):
         checkpoint_state_dict = checkpoint.pop("optimizer")
         optimizer.load_state_dict(checkpoint_state_dict)
         start_epoch = checkpoint.pop("epoch")
-                        
-                                
+                                                        
     return optimizer, start_epoch

+ 0 - 54
utils/solver/warmup_schedule.py

@@ -1,54 +0,0 @@
-
-# Build warmup scheduler
-
-
-def build_warmup(cfg, base_lr=0.01, wp_iter=500):
-    print('==============================')
-    print('WarmUpScheduler: {}'.format(cfg['warmup']))
-    print('--base_lr: {}'.format(base_lr))
-    print('--warmup_factor: {}'.format(cfg['warmup_factor']))
-    print('--wp_iter: {}'.format(wp_iter))
-
-    warmup_scheduler = WarmUpScheduler(name=cfg['warmup'], 
-                                       base_lr=base_lr, 
-                                       wp_iter=wp_iter, 
-                                       warmup_factor=cfg['warmup_factor'])
-    
-    return warmup_scheduler
-
-                           
-# Basic Warmup Scheduler
-class WarmUpScheduler(object):
-    def __init__(self, 
-                 name='linear', 
-                 base_lr=0.01, 
-                 wp_iter=500, 
-                 warmup_factor=0.00066667):
-        self.name = name
-        self.base_lr = base_lr
-        self.wp_iter = wp_iter
-        self.warmup_factor = warmup_factor
-
-
-    def set_lr(self, optimizer, lr):
-        for param_group in optimizer.param_groups:
-            param_group['lr'] = lr
-
-
-    def warmup(self, iter, optimizer):
-        # warmup
-        assert iter < self.wp_iter
-        if self.name == 'exp':
-            tmp_lr = self.base_lr * pow(iter / self.wp_iter, 4)
-            self.set_lr(optimizer, tmp_lr)
-
-        elif self.name == 'linear':
-            alpha = iter / self.wp_iter
-            warmup_factor = self.warmup_factor * (1 - alpha) + alpha
-            tmp_lr = self.base_lr * warmup_factor
-            self.set_lr(optimizer, tmp_lr)
-
-
-    def __call__(self, iter, optimizer):
-        self.warmup(iter, optimizer)
-