fcos_config.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. # Fully Convolutional One-Stage object detector
  2. def build_fcos_config(args):
  3. # Standard FCOS 1x
  4. if args.model == 'fcos_r18_1x':
  5. return Fcos_R18_1x_Config()
  6. elif args.model == 'fcos_r50_1x':
  7. return Fcos_R50_1x_Config()
  8. # Standard FCOS 3x
  9. elif args.model == 'fcos_r18_3x':
  10. return Fcos_R18_3x_Config()
  11. elif args.model == 'fcos_r50_3x':
  12. return Fcos_R50_3x_Config()
  13. # Real-time FCOS 3x
  14. elif args.model == 'fcos_rt_r18_3x':
  15. return FcosRT_R18_3x_Config()
  16. elif args.model == 'fcos_rt_r50_3x':
  17. return FcosRT_R50_3x_Config()
  18. # E2E FCOS 3x
  19. elif args.model == 'fcos_e2e_r18_3x':
  20. return FcosE2E_R18_3x_Config()
  21. # PSS FCOS 3x
  22. elif args.model == 'fcos_pss_r18_3x':
  23. return FcosPSS_R18_3x_Config()
  24. else:
  25. raise NotImplementedError("No config for model: {}".format(args.model))
  26. # --------------- Base configuration ---------------
  27. class FcosBaseConfig(object):
  28. def __init__(self):
  29. # --------- Backbone ---------
  30. self.backbone = "resnet50"
  31. self.bk_norm = "FrozeBN"
  32. self.res5_dilation = False
  33. self.use_pretrained = True
  34. self.freeze_at = 1
  35. self.max_stride = 128
  36. self.out_stride = [8, 16, 32, 64, 128]
  37. # --------- Neck ---------
  38. self.neck = 'basic_fpn'
  39. self.fpn_p6_feat = True
  40. self.fpn_p7_feat = True
  41. self.fpn_p6_from_c5 = False
  42. # --------- Head ---------
  43. self.head = 'fcos_head'
  44. self.head_dim = 256
  45. self.num_cls_head = 4
  46. self.num_reg_head = 4
  47. self.head_act = 'relu'
  48. self.head_norm = 'GN'
  49. # --------- Post-process ---------
  50. self.train_topk = 1000
  51. self.train_conf_thresh = 0.05
  52. self.train_nms_thresh = 0.6
  53. self.test_topk = 100
  54. self.test_conf_thresh = 0.5
  55. self.test_nms_thresh = 0.45
  56. self.nms_class_agnostic = True
  57. # --------- Label Assignment ---------
  58. self.matcher = 'fcos_matcher'
  59. self.matcher_hpy = {'center_sampling_radius': 1.5,
  60. 'object_sizes_of_interest': [[-1, 64],
  61. [64, 128],
  62. [128, 256],
  63. [256, 512],
  64. [512, float('inf')]]
  65. }
  66. # --------- Loss weight ---------
  67. self.focal_loss_alpha = 0.25
  68. self.focal_loss_gamma = 2.0
  69. self.loss_cls_weight = 1.0
  70. self.loss_reg_weight = 1.0
  71. self.loss_ctn_weight = 1.0
  72. # --------- Optimizer ---------
  73. self.optimizer = 'sgd'
  74. self.batch_size_base = 16
  75. self.per_image_lr = 0.01 / 16
  76. self.bk_lr_ratio = 1.0 / 1.0
  77. self.momentum = 0.9
  78. self.weight_decay = 1e-4
  79. self.clip_max_norm = -1.0
  80. # --------- LR Scheduler ---------
  81. self.lr_scheduler = 'step'
  82. self.warmup = 'linear'
  83. self.warmup_iters = 500
  84. self.warmup_factor = 0.00066667
  85. # --------- Train epoch ---------
  86. self.max_epoch = 12 # 1x
  87. self.lr_epoch = [8, 11] # 1x
  88. self.eval_epoch = 2
  89. # --------- Data process ---------
  90. ## input size
  91. self.train_min_size = [800] # short edge of image
  92. self.train_max_size = 1333
  93. self.test_min_size = [800]
  94. self.test_max_size = 1333
  95. ## Pixel mean & std
  96. self.pixel_mean = [0.485, 0.456, 0.406]
  97. self.pixel_std = [0.229, 0.224, 0.225]
  98. ## Transforms
  99. self.box_format = 'xyxy'
  100. self.normalize_coords = False
  101. self.detr_style = False
  102. self.trans_config = [
  103. {'name': 'RandomHFlip'},
  104. {'name': 'RandomResize'},
  105. ]
  106. def print_config(self):
  107. config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
  108. for k, v in config_dict.items():
  109. print("{} : {}".format(k, v))
  110. # --------------- 1x scheduler ---------------
  111. class Fcos_R18_1x_Config(FcosBaseConfig):
  112. def __init__(self) -> None:
  113. super().__init__()
  114. ## Backbone
  115. self.backbone = "resnet18"
  116. class Fcos_R50_1x_Config(Fcos_R18_1x_Config):
  117. def __init__(self) -> None:
  118. super().__init__()
  119. self.backbone = "resnet50"
  120. # --------------- 3x scheduler ---------------
  121. class Fcos_R18_3x_Config(Fcos_R18_1x_Config):
  122. def __init__(self) -> None:
  123. super().__init__()
  124. # --------- Train epoch ---------
  125. self.max_epoch = 36 # 3x
  126. self.lr_epoch = [24, 33] # 3x
  127. self.eval_epoch = 2
  128. # --------- Data process ---------
  129. ## input size
  130. self.train_min_size = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] # short edge of image
  131. self.train_max_size = 1333
  132. self.test_min_size = [800]
  133. self.test_max_size = 1333
  134. class Fcos_R50_3x_Config(Fcos_R18_3x_Config):
  135. def __init__(self) -> None:
  136. super().__init__()
  137. ## Backbone
  138. self.backbone = "resnet50"
  139. # --------------- RT-FCOS & 3x scheduler ---------------
  140. class FcosRT_R18_3x_Config(FcosBaseConfig):
  141. def __init__(self) -> None:
  142. super().__init__()
  143. ## Backbone
  144. self.backbone = "resnet18"
  145. self.max_stride = 32
  146. self.out_stride = [8, 16, 32]
  147. # --------- Neck ---------
  148. self.neck = 'basic_fpn'
  149. self.fpn_p6_feat = False
  150. self.fpn_p7_feat = False
  151. self.fpn_p6_from_c5 = False
  152. # --------- Head ---------
  153. self.head = 'fcos_rt_head'
  154. self.head_dim = 256
  155. self.num_cls_head = 4
  156. self.num_reg_head = 4
  157. self.head_act = 'relu'
  158. self.head_norm = 'GN'
  159. # --------- Post-process ---------
  160. self.train_topk = 1000
  161. self.train_conf_thresh = 0.05
  162. self.train_nms_thresh = 0.6
  163. self.test_topk = 100
  164. self.test_conf_thresh = 0.4
  165. self.test_nms_thresh = 0.45
  166. self.nms_class_agnostic = True
  167. # --------- Label Assignment ---------
  168. self.matcher = 'simota'
  169. self.matcher_hpy = {'soft_center_radius': 3.0,
  170. 'topk_candidates': 13}
  171. # --------- Loss weight ---------
  172. self.focal_loss_alpha = 0.25
  173. self.focal_loss_gamma = 2.0
  174. self.loss_cls_weight = 1.0
  175. self.loss_reg_weight = 2.0
  176. # --------- Train epoch ---------
  177. self.max_epoch = 36 # 3x
  178. self.lr_epoch = [24, 33] # 3x
  179. # --------- Data process ---------
  180. ## input size
  181. self.train_min_size = [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608] # short edge of image
  182. self.train_max_size = 900
  183. self.test_min_size = [512]
  184. self.test_max_size = 736
  185. ## Pixel mean & std
  186. self.pixel_mean = [0.485, 0.456, 0.406]
  187. self.pixel_std = [0.229, 0.224, 0.225]
  188. ## Transforms
  189. self.box_format = 'xyxy'
  190. self.normalize_coords = False
  191. self.detr_style = False
  192. self.trans_config = [
  193. {'name': 'RandomHFlip'},
  194. {'name': 'RandomResize'},
  195. ]
  196. class FcosRT_R50_3x_Config(FcosRT_R18_3x_Config):
  197. def __init__(self) -> None:
  198. super().__init__()
  199. # --------- Backbone ---------
  200. self.backbone = "resnet50"
  201. # --------------- E2E-FCOS & 3x scheduler ---------------
  202. class FcosE2E_R18_3x_Config(FcosBaseConfig):
  203. def __init__(self) -> None:
  204. super().__init__()
  205. ## Backbone
  206. self.backbone = "resnet18"
  207. self.max_stride = 32
  208. self.out_stride = [8, 16, 32]
  209. # --------- Neck ---------
  210. self.neck = 'basic_fpn'
  211. self.fpn_p6_feat = False
  212. self.fpn_p7_feat = False
  213. self.fpn_p6_from_c5 = False
  214. # --------- Head ---------
  215. self.head = 'fcos_rt_head'
  216. self.head_dim = 256
  217. self.num_cls_head = 4
  218. self.num_reg_head = 4
  219. self.head_act = 'relu'
  220. self.head_norm = 'GN'
  221. # --------- Post-process ---------
  222. self.train_topk = 100
  223. self.train_conf_thresh = 0.05
  224. self.test_topk = 100
  225. self.test_conf_thresh = 0.4
  226. # --------- Label Assignment ---------
  227. self.matcher = 'simota'
  228. self.matcher_hpy = {'soft_center_radius': 3.0,
  229. 'topk_candidates': 13}
  230. # --------- Loss weight ---------
  231. self.focal_loss_alpha = 0.25
  232. self.focal_loss_gamma = 2.0
  233. self.loss_cls_weight = 1.0
  234. self.loss_reg_weight = 2.0
  235. # --------- Train epoch ---------
  236. self.max_epoch = 36 # 3x
  237. self.lr_epoch = [24, 33] # 3x
  238. # --------- Data process ---------
  239. ## input size
  240. self.train_min_size = [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608] # short edge of image
  241. self.train_max_size = 900
  242. self.test_min_size = [512]
  243. self.test_max_size = 736
  244. ## Pixel mean & std
  245. self.pixel_mean = [0.485, 0.456, 0.406]
  246. self.pixel_std = [0.229, 0.224, 0.225]
  247. ## Transforms
  248. self.box_format = 'xyxy'
  249. self.normalize_coords = False
  250. self.detr_style = False
  251. self.trans_config = [
  252. {'name': 'RandomHFlip'},
  253. {'name': 'RandomResize'},
  254. ]
  255. # --------------- PSS-FCOS & 3x scheduler ---------------
  256. class FcosPSS_R18_3x_Config(FcosBaseConfig):
  257. def __init__(self) -> None:
  258. super().__init__()
  259. ## Backbone
  260. self.backbone = "resnet18"
  261. self.max_stride = 32
  262. self.out_stride = [8, 16, 32]
  263. # --------- Neck ---------
  264. self.neck = 'basic_fpn'
  265. self.fpn_p6_feat = False
  266. self.fpn_p7_feat = False
  267. self.fpn_p6_from_c5 = False
  268. # --------- Head ---------
  269. self.head = 'fcos_pss_head'
  270. self.head_dim = 256
  271. self.num_cls_head = 4
  272. self.num_reg_head = 4
  273. self.head_act = 'relu'
  274. self.head_norm = 'GN'
  275. # --------- Post-process ---------
  276. self.train_topk = 100
  277. self.train_conf_thresh = 0.05
  278. self.test_topk = 100
  279. self.test_conf_thresh = 0.4
  280. # --------- Label Assignment ---------
  281. self.matcher = 'simota'
  282. self.matcher_hpy = {'soft_center_radius': 3.0,
  283. 'topk_candidates': 13}
  284. # --------- Loss weight ---------
  285. self.focal_loss_alpha = 0.25
  286. self.focal_loss_gamma = 2.0
  287. self.loss_cls_weight = 1.0
  288. self.loss_reg_weight = 2.0
  289. self.loss_pss_weight = 1.0
  290. # --------- Train epoch ---------
  291. self.max_epoch = 36 # 3x
  292. self.lr_epoch = [24, 33] # 3x
  293. # --------- Data process ---------
  294. ## input size
  295. self.train_min_size = [256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608] # short edge of image
  296. self.train_max_size = 900
  297. self.test_min_size = [512]
  298. self.test_max_size = 736
  299. ## Pixel mean & std
  300. self.pixel_mean = [0.485, 0.456, 0.406]
  301. self.pixel_std = [0.229, 0.224, 0.225]
  302. ## Transforms
  303. self.box_format = 'xyxy'
  304. self.normalize_coords = False
  305. self.detr_style = False
  306. self.trans_config = [
  307. {'name': 'RandomHFlip'},
  308. {'name': 'RandomResize'},
  309. ]