junxiaoyao
/
YOLO-Tutorial-v2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
							# RTCDet config


def build_rtcdet_config(args):
    if   args.model == 'rtcdet_n':
        return RTCDet_Nano_Config()
    elif args.model == 'rtcdet_t':
        return RTCDet_Tiny_Config()
    elif args.model == 'rtcdet_s':
        return RTCDet_Small_Config()
    elif args.model == 'rtcdet_m':
        return RTCDet_Medium_Config()
    elif args.model == 'rtcdet_l':
        return RTCDet_Large_Config()
    elif args.model == 'rtcdet_x':
        return RTCDet_xLarge_Config()
    else:
        raise NotImplementedError("No config for model: {}".format(args.model))
    
# RTCDet-Base config
class RTCDetBaseConfig(object):
    def __init__(self) -> None:
        # ---------------- Model config ----------------
        self.stage_dims  = [64, 128, 256, 512, 512]
        self.stage_depth = [3, 6, 6, 3]
        self.width    = 1.0
        self.depth    = 1.0
        self.ratio    = 1.0
        self.reg_max  = 16
        self.out_stride = [8, 16, 32]
        self.max_stride = 32
        self.num_levels = 3
        ## Backbone
        self.bk_block    = 'elan_layer'
        self.bk_ds_block = 'conv'
        self.bk_act      = 'silu'
        self.bk_norm     = 'bn'
        self.bk_depthwise   = False
        ## Neck
        self.neck_act       = 'silu'
        self.neck_norm      = 'bn'
        self.neck_depthwise = False
        self.neck_expand_ratio = 0.5
        self.spp_pooling_size  = 5
        ## FPN
        self.fpn_block     = 'elan_layer'
        self.fpn_ds_block  = 'conv'
        self.fpn_act       = 'silu'
        self.fpn_norm      = 'bn'
        self.fpn_depthwise = False
        ## Head
        self.head_act  = 'silu'
        self.head_norm = 'bn'
        self.head_depthwise = False
        self.num_cls_head   = 2
        self.num_reg_head   = 2

        # ---------------- Post-process config ----------------
        ## Post process
        self.val_topk = 1000
        self.val_conf_thresh = 0.001
        self.val_nms_thresh  = 0.7
        self.test_topk = 100
        self.test_conf_thresh = 0.2
        self.test_nms_thresh  = 0.5

        # ---------------- Assignment & Loss config ----------------
        self.loss_cls_type = "bce"
        self.matcher_dict = {"tal_alpha": 0.5, "tal_beta": 6.0, "topk_candidates": 10}
        self.weight_dict  = {"loss_cls": 0.5, "loss_box": 7.5, "loss_dfl": 1.5}

        # ---------------- Assignment & Loss config ----------------
        # self.loss_cls_type = "vfl"
        # self.matcher_dict = {"tal_alpha": 1.0, "tal_beta": 6.0, "topk_candidates": 13}   # For VFL
        # self.weight_dict  = {"loss_cls": 1.0, "loss_box": 2.5, "loss_dfl": 0.5}   # For VFL

        # ---------------- ModelEMA config ----------------
        self.use_ema = True
        self.ema_decay = 0.9998
        self.ema_tau   = 2000

        # ---------------- Optimizer config ----------------
        self.trainer      = 'yolo'
        self.no_norm_decay = True
        self.no_bias_decay = True
        self.batch_size_base = 64
        self.optimizer    = 'adamw'
        self.base_lr      = 0.001
        self.min_lr_ratio = 0.05      # min_lr  = base_lr * min_lr_ratio
        self.momentum     = 0.9
        self.weight_decay = 0.05
        self.clip_max_norm   = 35.0
        self.warmup_bias_lr  = 0.1
        self.warmup_momentum = 0.8
        self.use_fp16        = True  # use mixing precision

        # ---------------- Lr Scheduler config ----------------
        self.warmup_epoch = 3
        self.lr_scheduler = "cosine"
        self.max_epoch    = 500
        self.eval_epoch   = 10
        self.no_aug_epoch = 15

        # ---------------- Data process config ----------------
        self.aug_type = 'yolo'
        self.box_format = 'xyxy'
        self.normalize_coords = False
        self.mosaic_prob = 0.0
        self.mixup_prob  = 0.0
        self.copy_paste  = 0.0           # approximated by the YOLOX's mixup
        self.multi_scale = [0.5, 1.5]   # multi scale: [img_size * 0.5, img_size * 1.5]
        ## Pixel mean & std
        self.pixel_mean = [0., 0., 0.]
        self.pixel_std  = [255., 255., 255.]
        ## Transforms
        self.train_img_size = 640
        self.test_img_size  = 640
        self.affine_params = {
            'degrees': 0.0,
            'translate': 0.2,
            'scale': [0.1, 2.0],
            'shear': 0.0,
            'perspective': 0.0,
            'hsv_h': 0.015,
            'hsv_s': 0.7,
            'hsv_v': 0.4,
        }

    def print_config(self):
        config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
        for k, v in config_dict.items():
            print("{} : {}".format(k, v))

# RTCDet-N
class RTCDet_Nano_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 0.25
        self.depth = 0.34
        self.ratio = 2.0

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.0
        self.copy_paste  = 1.0

# RTCDet-T
class RTCDet_Tiny_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 0.375
        self.depth = 0.34
        self.ratio = 2.0

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.0
        self.copy_paste  = 1.0

# RTCDet-S
class RTCDet_Small_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 0.50
        self.depth = 0.34
        self.ratio = 2.0

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.05
        self.copy_paste  = 1.0

# RTCDet-M
class RTCDet_Medium_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 0.75
        self.depth = 0.67
        self.ratio = 1.5

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.1
        self.copy_paste  = 1.0

# RTCDet-L
class RTCDet_Large_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 1.0
        self.depth = 1.0
        self.ratio = 1.0

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.15
        self.copy_paste  = 1.0

# RTCDet-X
class RTCDet_xLarge_Config(RTCDetBaseConfig):
    def __init__(self) -> None:
        super().__init__()
        # ---------------- Model config ----------------
        self.width = 1.25
        self.depth = 1.0
        self.ratio = 1.0

        # ---------------- Data process config ----------------
        self.mosaic_prob = 1.0
        self.mixup_prob  = 0.2
        self.copy_paste  = 1.0