rtcdet_config.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # yolo Config
  2. def build_rtcdet_config(args):
  3. if args.model == 'rtcdet_n':
  4. return RTCDetNConfig()
  5. elif args.model == 'rtcdet_t':
  6. return RTCDetTConfig()
  7. elif args.model == 'rtcdet_s':
  8. return RTCDetSConfig()
  9. elif args.model == 'rtcdet_m':
  10. return RTCDetMConfig()
  11. elif args.model == 'rtcdet_l':
  12. return RTCDetLConfig()
  13. elif args.model == 'rtcdet_x':
  14. return RTCDetXConfig()
  15. else:
  16. raise NotImplementedError("No config for model: {}".format(args.model))
  17. # RTCDet-Base config
  18. class RTCDetBaseConfig(object):
  19. def __init__(self) -> None:
  20. # ---------------- Model config ----------------
  21. self.channel_width = 1.0
  22. self.last_stage_ratio = 1.0
  23. self.num_blocks = [3, 6, 6, 3]
  24. self.num_levels = 3
  25. self.out_stride = [8, 16, 32]
  26. self.max_stride = 32
  27. self.reg_max = 16
  28. self.scale = "b"
  29. ## Backbone
  30. self.bk_act = 'silu'
  31. self.bk_norm = 'BN'
  32. self.bk_depthwise = False
  33. self.use_pretrained = False
  34. ## Neck
  35. self.neck_act = 'silu'
  36. self.neck_norm = 'BN'
  37. self.neck_depthwise = False
  38. self.neck_expand_ratio = 0.5
  39. self.spp_pooling_size = 5
  40. ## FPN
  41. self.fpn_num_blocks = 3
  42. self.fpn_act = 'silu'
  43. self.fpn_norm = 'BN'
  44. self.fpn_depthwise = False
  45. ## Head
  46. self.head_act = 'silu'
  47. self.head_norm = 'BN'
  48. self.head_depthwise = False
  49. self.num_cls_head = 2
  50. self.num_reg_head = 2
  51. # ---------------- Post-process config ----------------
  52. ## Post process
  53. self.val_topk = 1000
  54. self.val_conf_thresh = 0.001
  55. self.val_nms_thresh = 0.7
  56. self.test_topk = 100
  57. self.test_conf_thresh = 0.3
  58. self.test_nms_thresh = 0.5
  59. # ---------------- Assignment config ----------------
  60. ## Matcher
  61. self.ota_soft_center_radius = 3.0
  62. self.ota_topk_candidates = 13
  63. ## Loss weight
  64. self.loss_cls = 1.0
  65. self.loss_box = 2.0
  66. self.loss_dfl = 0.5
  67. # ---------------- ModelEMA config ----------------
  68. self.use_ema = True
  69. self.ema_decay = 0.9998
  70. self.ema_tau = 2000
  71. # ---------------- Optimizer config ----------------
  72. self.trainer = 'yolo'
  73. self.optimizer = 'adamw'
  74. self.per_image_lr = 0.001 / 64
  75. self.base_lr = None # base_lr = per_image_lr * batch_size
  76. self.min_lr_ratio = 0.01 # min_lr = base_lr * min_lr_ratio
  77. self.momentum = 0.9
  78. self.weight_decay = 0.05
  79. self.clip_max_norm = 35.0
  80. self.warmup_bias_lr = 0.1
  81. self.warmup_momentum = 0.8
  82. # ---------------- Lr Scheduler config ----------------
  83. self.warmup_epoch = 3
  84. self.lr_scheduler = "cosine"
  85. self.max_epoch = 300
  86. self.eval_epoch = 10
  87. self.no_aug_epoch = 20
  88. # ---------------- Data process config ----------------
  89. self.aug_type = 'yolo'
  90. self.box_format = 'xyxy'
  91. self.normalize_coords = False
  92. self.mosaic_prob = 0.0
  93. self.mixup_prob = 0.0
  94. self.copy_paste = 0.0 # approximated by the YOLOX's mixup
  95. self.multi_scale = [0.5, 1.25] # multi scale: [img_size * 0.5, img_size * 1.25]
  96. ## Pixel mean & std
  97. self.pixel_mean = [0., 0., 0.]
  98. self.pixel_std = [255., 255., 255.]
  99. ## Transforms
  100. self.train_img_size = 640
  101. self.test_img_size = 640
  102. self.use_ablu = True
  103. self.affine_params = {
  104. 'degrees': 0.0,
  105. 'translate': 0.2,
  106. 'scale': [0.1, 2.0],
  107. 'shear': 0.0,
  108. 'perspective': 0.0,
  109. 'hsv_h': 0.015,
  110. 'hsv_s': 0.7,
  111. 'hsv_v': 0.4,
  112. }
  113. def print_config(self):
  114. config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
  115. for k, v in config_dict.items():
  116. print("{} : {}".format(k, v))
  117. # RTCDet-N
  118. class RTCDetNConfig(RTCDetBaseConfig):
  119. def __init__(self) -> None:
  120. super().__init__()
  121. # ---------------- Model config ----------------
  122. self.channel_width = 0.25
  123. self.last_stage_ratio = 2.0
  124. self.num_blocks = [1, 2, 2, 1]
  125. self.scale = "n"
  126. self.fpn_num_blocks = 1
  127. # ---------------- Data process config ----------------
  128. self.mosaic_prob = 1.0
  129. self.mixup_prob = 0.0
  130. self.copy_paste = 0.5
  131. # RTCDet-N
  132. class RTCDetTConfig(RTCDetBaseConfig):
  133. def __init__(self) -> None:
  134. super().__init__()
  135. # ---------------- Model config ----------------
  136. self.channel_width = 0.375
  137. self.last_stage_ratio = 2.0
  138. self.num_blocks = [1, 2, 2, 1]
  139. self.scale = "t"
  140. self.fpn_num_blocks = 1
  141. # ---------------- Data process config ----------------
  142. self.mosaic_prob = 1.0
  143. self.mixup_prob = 0.0
  144. self.copy_paste = 0.5
  145. # RTCDet-S
  146. class RTCDetSConfig(RTCDetBaseConfig):
  147. def __init__(self) -> None:
  148. super().__init__()
  149. # ---------------- Model config ----------------
  150. self.channel_width = 0.50
  151. self.num_blocks = [1, 2, 2, 1]
  152. self.last_stage_ratio = 2.0
  153. self.scale = "s"
  154. self.fpn_num_blocks = 1
  155. # ---------------- Data process config ----------------
  156. self.mosaic_prob = 1.0
  157. self.mixup_prob = 0.0
  158. self.copy_paste = 0.5
  159. # RTCDet-M
  160. class RTCDetMConfig(RTCDetBaseConfig):
  161. def __init__(self) -> None:
  162. super().__init__()
  163. # ---------------- Model config ----------------
  164. self.channel_width = 0.75
  165. self.last_stage_ratio = 1.5
  166. self.num_blocks = [2, 4, 4, 2]
  167. self.scale = "m"
  168. self.fpn_num_blocks = 2
  169. # ---------------- Data process config ----------------
  170. self.mosaic_prob = 1.0
  171. self.mixup_prob = 0.1
  172. self.copy_paste = 0.5
  173. # RTCDet-L
  174. class RTCDetLConfig(RTCDetBaseConfig):
  175. def __init__(self) -> None:
  176. super().__init__()
  177. # ---------------- Model config ----------------
  178. self.channel_width = 1.0
  179. self.last_stage_ratio = 1.0
  180. self.num_blocks = [3, 6, 6, 3]
  181. self.scale = "l"
  182. self.fpn_num_blocks = 3
  183. # ---------------- Data process config ----------------
  184. self.mosaic_prob = 1.0
  185. self.mixup_prob = 0.1
  186. self.copy_paste = 0.5
  187. # RTCDet-X
  188. class RTCDetXConfig(RTCDetBaseConfig):
  189. def __init__(self) -> None:
  190. super().__init__()
  191. # ---------------- Model config ----------------
  192. self.channel_width = 1.25
  193. self.last_stage_ratio = 1.0
  194. self.num_blocks = [3, 6, 6, 3]
  195. self.scale = "x"
  196. self.fpn_num_blocks = 4
  197. # ---------------- Data process config ----------------
  198. self.mosaic_prob = 1.0
  199. self.mixup_prob = 0.1
  200. self.copy_paste = 0.5