rtdetr_config.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. # Real-time Transformer-based Object Detector
  2. def build_rtdetr_config(args):
  3. if args.model == "rtdetr_r18":
  4. return RTDetrR18Config()
  5. elif args.model == "rtdetr_r50":
  6. return RTDetrR50Config()
  7. raise NotImplementedError("No config for model: {}".format(args.model))
  8. # rtdetr-Base config
  9. class RTDetrBaseConfig(object):
  10. def __init__(self) -> None:
  11. # ---------------- Model config ----------------
  12. self.out_stride = [8, 16, 32]
  13. self.max_stride = 32
  14. ## Backbone
  15. self.backbone = 'resnet18'
  16. self.backbone_norm = 'BN'
  17. self.pretrained_weight = 'imagenet1k_v1'
  18. self.pretrained = True
  19. self.freeze_at = 0
  20. self.freeze_stem_only = False
  21. ## Image Encoder - FPN
  22. self.fpn = 'hybrid_encoder'
  23. self.fpn_num_blocks = 3
  24. self.fpn_expand_ratio = 0.5
  25. self.fpn_act = 'silu'
  26. self.fpn_norm = 'BN'
  27. self.fpn_depthwise = False
  28. self.hidden_dim = 256
  29. self.en_num_heads = 8
  30. self.en_num_layers = 1
  31. self.en_ffn_dim = 1024
  32. self.en_dropout = 0.0
  33. self.en_act = 'gelu'
  34. ## Transformer Decoder
  35. self.transformer = 'rtdetr_transformer'
  36. self.de_num_heads = 8
  37. self.de_num_layers = 3
  38. self.de_ffn_dim = 1024
  39. self.de_dropout = 0.0
  40. self.de_act = 'relu'
  41. self.de_num_points = 4
  42. self.num_queries = 300
  43. self.learnt_init_query = False
  44. ## DN
  45. self.dn_num_denoising = 100
  46. self.dn_label_noise_ratio = 0.5
  47. self.dn_box_noise_scale = 1
  48. # ---------------- Post-process config ----------------
  49. ## Post process
  50. self.val_topk = 300
  51. self.val_conf_thresh = 0.001
  52. self.val_nms_thresh = 0.7
  53. self.test_topk = 300
  54. self.test_conf_thresh = 0.4
  55. self.test_nms_thresh = 0.5
  56. # ---------------- Assignment config ----------------
  57. ## Matcher
  58. self.cost_class = 2.0
  59. self.cost_bbox = 5.0
  60. self.cost_giou = 2.0
  61. ## Loss weight
  62. self.loss_cls = 1.0
  63. self.loss_box = 5.0
  64. self.loss_giou = 2.0
  65. # ---------------- ModelEMA config ----------------
  66. self.use_ema = True
  67. self.ema_decay = 0.9999
  68. self.ema_tau = 2000
  69. # ---------------- Optimizer config ----------------
  70. self.trainer = 'rtdetr'
  71. self.optimizer = 'adamw'
  72. self.per_image_lr = 0.0001 / 16
  73. self.base_lr = None # base_lr = per_image_lr * batch_size
  74. self.min_lr_ratio = 0.0
  75. self.backbone_lr_ratio = 0.1
  76. self.momentum = None
  77. self.weight_decay = 0.0001
  78. self.clip_max_norm = 0.1
  79. # ---------------- Lr Scheduler config ----------------
  80. self.warmup = 'linear'
  81. self.warmup_iters = 2000
  82. self.warmup_factor = 0.00066667
  83. self.lr_scheduler = "step"
  84. self.lr_epoch = [100]
  85. self.max_epoch = 72
  86. self.eval_epoch = 1
  87. # ---------------- Data process config ----------------
  88. self.aug_type = 'ssd'
  89. self.box_format = 'xywh'
  90. self.normalize_coords = True
  91. self.mosaic_prob = 0.0
  92. self.mixup_prob = 0.0
  93. self.copy_paste = 0.0
  94. self.multi_scale = [0.75, 1.25]
  95. ## Pixel mean & std
  96. self.pixel_mean = [123.675, 116.28, 103.53] # RGB format
  97. self.pixel_std = [58.395, 57.12, 57.375] # RGB format
  98. ## Transforms
  99. self.train_img_size = 640
  100. self.test_img_size = 640
  101. def print_config(self):
  102. config_dict = {key: value for key, value in self.__dict__.items() if not key.startswith('__')}
  103. for k, v in config_dict.items():
  104. print("{} : {}".format(k, v))
  105. # RT-DETR-R18
  106. class RTDetrR18Config(RTDetrBaseConfig):
  107. def __init__(self) -> None:
  108. super().__init__()
  109. ## Backbone
  110. self.backbone = 'resnet18'
  111. self.backbone_norm = 'BN'
  112. self.pretrained_weight = 'imagenet1k_v1'
  113. self.pretrained = True
  114. self.freeze_at = -1
  115. self.freeze_stem_only = False
  116. # RT-DETR-R50
  117. class RTDetrR50Config(RTDetrBaseConfig):
  118. def __init__(self) -> None:
  119. super().__init__()
  120. ## Backbone
  121. self.backbone = 'resnet50'
  122. self.backbone_norm = 'BN'
  123. self.pretrained_weight = 'imagenet1k_v1'
  124. self.pretrained = True
  125. self.freeze_at = -1
  126. self.freeze_stem_only = False
  127. ## Transformer Decoder
  128. self.de_num_layers = 6