basic.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. import numpy as np
  2. import copy
  3. import torch
  4. import torch.nn as nn
  5. import torch.nn.functional as F
  6. # ---------------------------- NMS ----------------------------
  7. ## basic NMS
  8. def nms(bboxes, scores, nms_thresh):
  9. """"Pure Python NMS."""
  10. x1 = bboxes[:, 0] #xmin
  11. y1 = bboxes[:, 1] #ymin
  12. x2 = bboxes[:, 2] #xmax
  13. y2 = bboxes[:, 3] #ymax
  14. areas = (x2 - x1) * (y2 - y1)
  15. order = scores.argsort()[::-1]
  16. keep = []
  17. while order.size > 0:
  18. i = order[0]
  19. keep.append(i)
  20. # compute iou
  21. xx1 = np.maximum(x1[i], x1[order[1:]])
  22. yy1 = np.maximum(y1[i], y1[order[1:]])
  23. xx2 = np.minimum(x2[i], x2[order[1:]])
  24. yy2 = np.minimum(y2[i], y2[order[1:]])
  25. w = np.maximum(1e-10, xx2 - xx1)
  26. h = np.maximum(1e-10, yy2 - yy1)
  27. inter = w * h
  28. iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-14)
  29. #reserve all the boundingbox whose ovr less than thresh
  30. inds = np.where(iou <= nms_thresh)[0]
  31. order = order[inds + 1]
  32. return keep
  33. ## class-agnostic NMS
  34. def multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh):
  35. # nms
  36. keep = nms(bboxes, scores, nms_thresh)
  37. scores = scores[keep]
  38. labels = labels[keep]
  39. bboxes = bboxes[keep]
  40. return scores, labels, bboxes
  41. ## class-aware NMS
  42. def multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes):
  43. # nms
  44. keep = np.zeros(len(bboxes), dtype=np.int32)
  45. for i in range(num_classes):
  46. inds = np.where(labels == i)[0]
  47. if len(inds) == 0:
  48. continue
  49. c_bboxes = bboxes[inds]
  50. c_scores = scores[inds]
  51. c_keep = nms(c_bboxes, c_scores, nms_thresh)
  52. keep[inds[c_keep]] = 1
  53. keep = np.where(keep > 0)
  54. scores = scores[keep]
  55. labels = labels[keep]
  56. bboxes = bboxes[keep]
  57. return scores, labels, bboxes
  58. ## multi-class NMS
  59. def multiclass_nms(scores, labels, bboxes, nms_thresh, num_classes, class_agnostic=False):
  60. if class_agnostic:
  61. return multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh)
  62. else:
  63. return multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes)
  64. # ----------------- MLP modules -----------------
  65. class MLP(nn.Module):
  66. def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
  67. super().__init__()
  68. self.num_layers = num_layers
  69. h = [hidden_dim] * (num_layers - 1)
  70. self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
  71. def forward(self, x):
  72. for i, layer in enumerate(self.layers):
  73. x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
  74. return x
  75. class FFN(nn.Module):
  76. def __init__(self, d_model=256, ffn_dim=1024, dropout=0., act_type='relu'):
  77. super().__init__()
  78. self.ffn_dim = ffn_dim
  79. self.linear1 = nn.Linear(d_model, self.ffn_dim)
  80. self.activation = get_activation(act_type)
  81. self.dropout2 = nn.Dropout(dropout)
  82. self.linear2 = nn.Linear(self.ffn_dim, d_model)
  83. self.dropout3 = nn.Dropout(dropout)
  84. self.norm = nn.LayerNorm(d_model)
  85. def forward(self, src):
  86. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  87. src = src + self.dropout3(src2)
  88. src = self.norm(src)
  89. return src
  90. # ----------------- Basic CNN Ops -----------------
  91. def get_conv2d(c1, c2, k, p, s, g, bias=False):
  92. conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, groups=g, bias=bias)
  93. return conv
  94. def get_activation(act_type=None):
  95. if act_type == 'relu':
  96. return nn.ReLU(inplace=True)
  97. elif act_type == 'lrelu':
  98. return nn.LeakyReLU(0.1, inplace=True)
  99. elif act_type == 'mish':
  100. return nn.Mish(inplace=True)
  101. elif act_type == 'silu':
  102. return nn.SiLU(inplace=True)
  103. elif act_type == 'gelu':
  104. return nn.GELU()
  105. elif act_type is None:
  106. return nn.Identity()
  107. else:
  108. raise NotImplementedError
  109. def get_norm(norm_type, dim):
  110. if norm_type == 'BN':
  111. return nn.BatchNorm2d(dim)
  112. elif norm_type == 'GN':
  113. return nn.GroupNorm(num_groups=32, num_channels=dim)
  114. elif norm_type is None:
  115. return nn.Identity()
  116. else:
  117. raise NotImplementedError
  118. def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
  119. """3x3 convolution with padding"""
  120. return nn.Conv2d(
  121. in_planes,
  122. out_planes,
  123. kernel_size=3,
  124. stride=stride,
  125. padding=dilation,
  126. groups=groups,
  127. bias=False,
  128. dilation=dilation,
  129. )
  130. def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
  131. """1x1 convolution"""
  132. return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
  133. class FrozenBatchNorm2d(torch.nn.Module):
  134. def __init__(self, n):
  135. super(FrozenBatchNorm2d, self).__init__()
  136. self.register_buffer("weight", torch.ones(n))
  137. self.register_buffer("bias", torch.zeros(n))
  138. self.register_buffer("running_mean", torch.zeros(n))
  139. self.register_buffer("running_var", torch.ones(n))
  140. def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
  141. missing_keys, unexpected_keys, error_msgs):
  142. num_batches_tracked_key = prefix + 'num_batches_tracked'
  143. if num_batches_tracked_key in state_dict:
  144. del state_dict[num_batches_tracked_key]
  145. super(FrozenBatchNorm2d, self)._load_from_state_dict(
  146. state_dict, prefix, local_metadata, strict,
  147. missing_keys, unexpected_keys, error_msgs)
  148. def forward(self, x):
  149. # move reshapes to the beginning
  150. # to make it fuser-friendly
  151. w = self.weight.reshape(1, -1, 1, 1)
  152. b = self.bias.reshape(1, -1, 1, 1)
  153. rv = self.running_var.reshape(1, -1, 1, 1)
  154. rm = self.running_mean.reshape(1, -1, 1, 1)
  155. eps = 1e-5
  156. scale = w * (rv + eps).rsqrt()
  157. bias = b - rm * scale
  158. return x * scale + bias
  159. class BasicConv(nn.Module):
  160. def __init__(self,
  161. in_dim, # in channels
  162. out_dim, # out channels
  163. kernel_size=1, # kernel size
  164. padding=0, # padding
  165. stride=1, # padding
  166. act_type :str = 'lrelu', # activation
  167. norm_type :str = 'BN', # normalization
  168. depthwise :bool = False
  169. ):
  170. super(BasicConv, self).__init__()
  171. add_bias = False if norm_type else True
  172. self.depthwise = depthwise
  173. if not depthwise:
  174. self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=1, bias=add_bias)
  175. self.norm = get_norm(norm_type, out_dim)
  176. else:
  177. self.conv1 = get_conv2d(in_dim, in_dim, k=kernel_size, p=padding, s=stride, g=1, bias=add_bias)
  178. self.norm1 = get_norm(norm_type, in_dim)
  179. self.conv2 = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=1, bias=add_bias)
  180. self.norm2 = get_norm(norm_type, out_dim)
  181. self.act = get_activation(act_type)
  182. def forward(self, x):
  183. if not self.depthwise:
  184. return self.act(self.norm(self.conv(x)))
  185. else:
  186. return self.act(self.norm2(self.conv2(self.norm1(self.conv1(x)))))
  187. # ----------------- CNN Modules -----------------
  188. class Bottleneck(nn.Module):
  189. def __init__(self,
  190. in_dim,
  191. out_dim,
  192. expand_ratio = 0.5,
  193. kernel_sizes = [3, 3],
  194. shortcut = True,
  195. act_type = 'silu',
  196. norm_type = 'BN',
  197. depthwise = False,):
  198. super(Bottleneck, self).__init__()
  199. inter_dim = int(out_dim * expand_ratio)
  200. paddings = [k // 2 for k in kernel_sizes]
  201. self.cv1 = BasicConv(in_dim, inter_dim,
  202. kernel_size=kernel_sizes[0], padding=paddings[0],
  203. act_type=act_type, norm_type=norm_type, depthwise=depthwise)
  204. self.cv2 = BasicConv(inter_dim, out_dim,
  205. kernel_size=kernel_sizes[1], padding=paddings[1],
  206. act_type=act_type, norm_type=norm_type, depthwise=depthwise)
  207. self.shortcut = shortcut and in_dim == out_dim
  208. def forward(self, x):
  209. h = self.cv2(self.cv1(x))
  210. return x + h if self.shortcut else h
  211. class RTCBlock(nn.Module):
  212. def __init__(self,
  213. in_dim,
  214. out_dim,
  215. num_blocks = 1,
  216. shortcut = False,
  217. act_type = 'silu',
  218. norm_type = 'BN',
  219. depthwise = False,):
  220. super(RTCBlock, self).__init__()
  221. self.inter_dim = out_dim // 2
  222. self.input_proj = BasicConv(in_dim, self.inter_dim * 2, kernel_size=1, act_type=act_type, norm_type=norm_type)
  223. self.m = nn.Sequential(*(
  224. Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
  225. for _ in range(num_blocks)))
  226. self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  227. def forward(self, x):
  228. # Input proj
  229. x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
  230. out = list([x1, x2])
  231. # Bottlenecl
  232. out.extend(m(out[-1]) for m in self.m)
  233. # Output proj
  234. out = self.output_proj(torch.cat(out, dim=1))
  235. return out