| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
- import os
- import random
- import argparse
- import numpy as np
- from copy import deepcopy
- import torch
- import torch.distributed as dist
- from torch.nn.parallel import DistributedDataParallel as DDP
- from utils import distributed_utils
- from utils.misc import compute_flops, collate_fn
- from utils.optimizer import build_optimizer
- from utils.lr_scheduler import build_wp_lr_scheduler, build_lr_scheduler
- from config import build_config
- from evaluator import build_evluator
- from datasets import build_dataset, build_dataloader, build_transform
- from models.detectors import build_model
- from engine import train_one_epoch
- def parse_args():
- parser = argparse.ArgumentParser('General 2D Object Detection', add_help=False)
- # Random seed
- parser.add_argument('--seed', default=42, type=int)
- # GPU
- parser.add_argument('--cuda', action='store_true', default=False,
- help='use cuda.')
- # Batch size
- parser.add_argument('-bs', '--batch_size', default=16, type=int,
- help='total batch size on all GPUs.')
- # Model
- parser.add_argument('-m', '--model', default='yolof_r18_c5_1x',
- help='build object detector')
- parser.add_argument('-r', '--resume', default=None, type=str,
- help='keep training')
- # Dataset
- parser.add_argument('--root', default='/Users/liuhaoran/Desktop/python_work/object-detection/dataset/COCO/',
- help='data root')
- parser.add_argument('-d', '--dataset', default='coco',
- help='coco, voc, widerface, crowdhuman')
- parser.add_argument('--vis_tgt', action="store_true", default=False,
- help="visualize input data.")
- # Dataloader
- parser.add_argument('--num_workers', default=2, type=int,
- help='Number of workers used in dataloading')
- # Epoch
- parser.add_argument('--save_folder', default='weights/', type=str,
- help='path to save weight')
- parser.add_argument('--eval_first', action="store_true", default=False,
- help="visualize input data.")
- # DDP train
- parser.add_argument('-dist', '--distributed', action='store_true', default=False,
- help='distributed training')
- parser.add_argument('--dist_url', default='env://',
- help='url used to set up distributed training')
- parser.add_argument('--world_size', default=1, type=int,
- help='number of distributed processes')
- parser.add_argument('--sybn', action='store_true', default=False,
- help='use sybn.')
- # Debug setting
- parser.add_argument('--debug', action='store_true', default=False,
- help='debug codes.')
- return parser.parse_args()
- def fix_random_seed(args):
- seed = args.seed + distributed_utils.get_rank()
- torch.manual_seed(seed)
- np.random.seed(seed)
- random.seed(seed)
- def main():
- args = parse_args()
- print("Setting Arguments.. : ", args)
- print("----------------------------------------------------------")
- # path to save model
- path_to_save = os.path.join(args.save_folder, args.dataset, args.model)
- os.makedirs(path_to_save, exist_ok=True)
- # ---------------------------- Build DDP ----------------------------
- local_rank = local_process_rank = -1
- if args.distributed:
- distributed_utils.init_distributed_mode(args)
- print("git:\n {}\n".format(distributed_utils.get_sha()))
- try:
- # Multiple Mechine & Multiple GPUs (world size > 8)
- local_rank = torch.distributed.get_rank()
- local_process_rank = int(os.getenv('LOCAL_PROCESS_RANK', '0'))
- except:
- # Single Mechine & Multiple GPUs (world size <= 8)
- local_rank = local_process_rank = torch.distributed.get_rank()
- world_size = distributed_utils.get_world_size()
- per_gpu_batch = args.batch_size // world_size
- print("LOCAL RANK: ", local_rank)
- print("LOCAL_PROCESS_RANL: ", local_process_rank)
- print('WORLD SIZE: {}'.format(world_size))
-
- # ---------------------------- Build CUDA ----------------------------
- if args.cuda and torch.cuda.is_available():
- print('use cuda')
- device = torch.device("cuda")
- else:
- device = torch.device("cpu")
- # ---------------------------- Fix random seed ----------------------------
- fix_random_seed(args)
- # ---------------------------- Build config ----------------------------
- cfg = build_config(args)
- # ---------------------------- Build Dataset ----------------------------
- transforms = build_transform(cfg, is_train=True)
- dataset = build_dataset(args, cfg, transforms, is_train=True)
- # ---------------------------- Build Dataloader ----------------------------
- train_loader = build_dataloader(args, dataset, per_gpu_batch, collate_fn, is_train=True)
- # ---------------------------- Build model ----------------------------
- ## Build model
- model, criterion = build_model(args, cfg, is_val=True)
- model.to(device)
- criterion.to(device)
- model_without_ddp = model
- ## Calcute Params & GFLOPs
- if distributed_utils.is_main_process():
- model_copy = deepcopy(model_without_ddp)
- model_copy.trainable = False
- model_copy.eval()
- compute_flops(model=model_copy,
- min_size=cfg.test_min_size,
- max_size=cfg.test_max_size,
- device=device)
- del model_copy
- if args.distributed:
- dist.barrier()
- # ---------------------------- Build Optimizer ----------------------------
- cfg.grad_accumulate = max(cfg.batch_size_base // args.batch_size, 1)
- cfg.base_lr = cfg.per_image_lr * args.batch_size * cfg.grad_accumulate
- optimizer, start_epoch = build_optimizer(cfg, model_without_ddp, args.resume)
- # ---------------------------- Build LR Scheduler ----------------------------
- wp_lr_scheduler = build_wp_lr_scheduler(cfg)
- lr_scheduler = build_lr_scheduler(cfg, optimizer, args.resume)
- # ---------------------------- Build DDP model ----------------------------
- if args.distributed:
- model = DDP(model, device_ids=[args.gpu])
- model_without_ddp = model.module
- # ---------------------------- Build Evaluator ----------------------------
- evaluator = build_evluator(args, cfg, device)
- # ----------------------- Eval before training -----------------------
- if args.eval_first and distributed_utils.is_main_process():
- evaluator.evaluate(model_without_ddp)
- return
- # ----------------------- Training -----------------------
- print("Start training")
- best_map = cfg.best_map
- for epoch in range(start_epoch, cfg.max_epoch):
- if args.distributed:
- train_loader.batch_sampler.sampler.set_epoch(epoch)
- # Train one epoch
- train_one_epoch(cfg,
- model,
- criterion,
- train_loader,
- optimizer,
- device,
- epoch,
- args.vis_tgt,
- wp_lr_scheduler,
- debug=args.debug)
-
- # LR Scheduler
- lr_scheduler.step()
- # Evaluate
- if distributed_utils.is_main_process():
- model_eval = model_without_ddp
- to_save = False
- if (epoch % cfg.eval_epoch) == 0 or (epoch == cfg.max_epoch - 1):
- if evaluator is None:
- to_save = True
- else:
- evaluator.evaluate(model_eval)
- # Save model
- if evaluator.map >= best_map:
- best_map = evaluator.map
- to_save = True
- if to_save:
- # save model
- print('Saving state, epoch:', epoch)
- torch.save({'model': model_eval.state_dict(),
- 'optimizer': optimizer.state_dict(),
- 'lr_scheduler': lr_scheduler.state_dict(),
- 'mAP': round(best_map*100, 3),
- 'epoch': epoch,
- 'args': args},
- os.path.join(path_to_save, '{}_best.pth'.format(args.model)))
- if args.distributed:
- dist.barrier()
- if args.debug:
- print("For debug mode, we only train the model with 1 epoch.")
- exit(0)
- if __name__ == '__main__':
- main()
|