basic.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. import torch.nn.functional as F
  5. # ---------------------------- NMS ----------------------------
  6. ## basic NMS
  7. def nms(bboxes, scores, nms_thresh):
  8. """"Pure Python NMS."""
  9. x1 = bboxes[:, 0] #xmin
  10. y1 = bboxes[:, 1] #ymin
  11. x2 = bboxes[:, 2] #xmax
  12. y2 = bboxes[:, 3] #ymax
  13. areas = (x2 - x1) * (y2 - y1)
  14. order = scores.argsort()[::-1]
  15. keep = []
  16. while order.size > 0:
  17. i = order[0]
  18. keep.append(i)
  19. # compute iou
  20. xx1 = np.maximum(x1[i], x1[order[1:]])
  21. yy1 = np.maximum(y1[i], y1[order[1:]])
  22. xx2 = np.minimum(x2[i], x2[order[1:]])
  23. yy2 = np.minimum(y2[i], y2[order[1:]])
  24. w = np.maximum(1e-10, xx2 - xx1)
  25. h = np.maximum(1e-10, yy2 - yy1)
  26. inter = w * h
  27. iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-14)
  28. #reserve all the boundingbox whose ovr less than thresh
  29. inds = np.where(iou <= nms_thresh)[0]
  30. order = order[inds + 1]
  31. return keep
  32. ## class-agnostic NMS
  33. def multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh):
  34. # nms
  35. keep = nms(bboxes, scores, nms_thresh)
  36. scores = scores[keep]
  37. labels = labels[keep]
  38. bboxes = bboxes[keep]
  39. return scores, labels, bboxes
  40. ## class-aware NMS
  41. def multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes):
  42. # nms
  43. keep = np.zeros(len(bboxes), dtype=np.int32)
  44. for i in range(num_classes):
  45. inds = np.where(labels == i)[0]
  46. if len(inds) == 0:
  47. continue
  48. c_bboxes = bboxes[inds]
  49. c_scores = scores[inds]
  50. c_keep = nms(c_bboxes, c_scores, nms_thresh)
  51. keep[inds[c_keep]] = 1
  52. keep = np.where(keep > 0)
  53. scores = scores[keep]
  54. labels = labels[keep]
  55. bboxes = bboxes[keep]
  56. return scores, labels, bboxes
  57. ## multi-class NMS
  58. def multiclass_nms(scores, labels, bboxes, nms_thresh, num_classes, class_agnostic=False):
  59. if class_agnostic:
  60. return multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh)
  61. else:
  62. return multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes)
  63. # ----------------- MLP modules -----------------
  64. class MLP(nn.Module):
  65. def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
  66. super().__init__()
  67. self.num_layers = num_layers
  68. h = [hidden_dim] * (num_layers - 1)
  69. self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
  70. def forward(self, x):
  71. for i, layer in enumerate(self.layers):
  72. x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
  73. return x
  74. class FFN(nn.Module):
  75. def __init__(self, d_model=256, ffn_dim=1024, dropout=0., act_type='relu'):
  76. super().__init__()
  77. self.ffn_dim = ffn_dim
  78. self.linear1 = nn.Linear(d_model, self.ffn_dim)
  79. self.activation = get_activation(act_type)
  80. self.dropout2 = nn.Dropout(dropout)
  81. self.linear2 = nn.Linear(self.ffn_dim, d_model)
  82. self.dropout3 = nn.Dropout(dropout)
  83. self.norm = nn.LayerNorm(d_model)
  84. def forward(self, src):
  85. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  86. src = src + self.dropout3(src2)
  87. src = self.norm(src)
  88. return src
  89. # ----------------- Basic CNN Ops -----------------
  90. def get_conv2d(c1, c2, k, p, s, g, bias=False):
  91. conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, groups=g, bias=bias)
  92. return conv
  93. def get_activation(act_type=None):
  94. if act_type == 'relu':
  95. return nn.ReLU(inplace=True)
  96. elif act_type == 'lrelu':
  97. return nn.LeakyReLU(0.1, inplace=True)
  98. elif act_type == 'mish':
  99. return nn.Mish(inplace=True)
  100. elif act_type == 'silu':
  101. return nn.SiLU(inplace=True)
  102. elif act_type == 'gelu':
  103. return nn.GELU()
  104. elif act_type is None:
  105. return nn.Identity()
  106. else:
  107. raise NotImplementedError
  108. def get_norm(norm_type, dim):
  109. if norm_type == 'BN':
  110. return nn.BatchNorm2d(dim)
  111. elif norm_type == 'GN':
  112. return nn.GroupNorm(num_groups=32, num_channels=dim)
  113. elif norm_type is None:
  114. return nn.Identity()
  115. else:
  116. raise NotImplementedError
  117. def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
  118. """3x3 convolution with padding"""
  119. return nn.Conv2d(
  120. in_planes,
  121. out_planes,
  122. kernel_size=3,
  123. stride=stride,
  124. padding=dilation,
  125. groups=groups,
  126. bias=False,
  127. dilation=dilation,
  128. )
  129. def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
  130. """1x1 convolution"""
  131. return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
  132. class FrozenBatchNorm2d(torch.nn.Module):
  133. def __init__(self, n):
  134. super(FrozenBatchNorm2d, self).__init__()
  135. self.register_buffer("weight", torch.ones(n))
  136. self.register_buffer("bias", torch.zeros(n))
  137. self.register_buffer("running_mean", torch.zeros(n))
  138. self.register_buffer("running_var", torch.ones(n))
  139. def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
  140. missing_keys, unexpected_keys, error_msgs):
  141. num_batches_tracked_key = prefix + 'num_batches_tracked'
  142. if num_batches_tracked_key in state_dict:
  143. del state_dict[num_batches_tracked_key]
  144. super(FrozenBatchNorm2d, self)._load_from_state_dict(
  145. state_dict, prefix, local_metadata, strict,
  146. missing_keys, unexpected_keys, error_msgs)
  147. def forward(self, x):
  148. # move reshapes to the beginning
  149. # to make it fuser-friendly
  150. w = self.weight.reshape(1, -1, 1, 1)
  151. b = self.bias.reshape(1, -1, 1, 1)
  152. rv = self.running_var.reshape(1, -1, 1, 1)
  153. rm = self.running_mean.reshape(1, -1, 1, 1)
  154. eps = 1e-5
  155. scale = w * (rv + eps).rsqrt()
  156. bias = b - rm * scale
  157. return x * scale + bias
  158. class BasicConv(nn.Module):
  159. def __init__(self,
  160. in_dim, # in channels
  161. out_dim, # out channels
  162. kernel_size=1, # kernel size
  163. padding=0, # padding
  164. stride=1, # padding
  165. act_type :str = 'lrelu', # activation
  166. norm_type :str = 'BN', # normalization
  167. ):
  168. super(BasicConv, self).__init__()
  169. add_bias = False if norm_type else True
  170. self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=1, bias=add_bias)
  171. self.norm = get_norm(norm_type, out_dim)
  172. self.act = get_activation(act_type)
  173. def forward(self, x):
  174. return self.act(self.norm(self.conv(x)))
  175. class DepthwiseConv(nn.Module):
  176. def __init__(self,
  177. in_dim, # in channels
  178. out_dim, # out channels
  179. kernel_size=1, # kernel size
  180. padding=0, # padding
  181. stride=1, # padding
  182. act_type :str = None, # activation
  183. norm_type :str = 'BN', # normalization
  184. ):
  185. super(DepthwiseConv, self).__init__()
  186. assert in_dim == out_dim
  187. add_bias = False if norm_type else True
  188. self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=out_dim, bias=add_bias)
  189. self.norm = get_norm(norm_type, out_dim)
  190. self.act = get_activation(act_type)
  191. def forward(self, x):
  192. return self.act(self.norm(self.conv(x)))
  193. class PointwiseConv(nn.Module):
  194. def __init__(self,
  195. in_dim, # in channels
  196. out_dim, # out channels
  197. act_type :str = 'lrelu', # activation
  198. norm_type :str = 'BN', # normalization
  199. ):
  200. super(DepthwiseConv, self).__init__()
  201. assert in_dim == out_dim
  202. add_bias = False if norm_type else True
  203. self.conv = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, g=1, bias=add_bias)
  204. self.norm = get_norm(norm_type, out_dim)
  205. self.act = get_activation(act_type)
  206. def forward(self, x):
  207. return self.act(self.norm(self.conv(x)))
  208. # ----------------- CNN Modules -----------------
  209. class Bottleneck(nn.Module):
  210. def __init__(self,
  211. in_dim,
  212. out_dim,
  213. expand_ratio = 0.5,
  214. kernel_sizes = [3, 3],
  215. shortcut = True,
  216. act_type = 'silu',
  217. norm_type = 'BN',
  218. depthwise = False,):
  219. super(Bottleneck, self).__init__()
  220. inter_dim = int(out_dim * expand_ratio)
  221. if depthwise:
  222. self.cv1 = nn.Sequential(
  223. DepthwiseConv(in_dim, in_dim, kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type),
  224. PointwiseConv(in_dim, inter_dim, act_type=act_type, norm_type=norm_type),
  225. )
  226. self.cv2 = nn.Sequential(
  227. DepthwiseConv(inter_dim, inter_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type),
  228. PointwiseConv(inter_dim, out_dim, act_type=act_type, norm_type=norm_type),
  229. )
  230. else:
  231. self.cv1 = BasicConv(in_dim, inter_dim, kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type)
  232. self.cv2 = BasicConv(inter_dim, out_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type)
  233. self.shortcut = shortcut and in_dim == out_dim
  234. def forward(self, x):
  235. h = self.cv2(self.cv1(x))
  236. return x + h if self.shortcut else h
  237. class RTCBlock(nn.Module):
  238. def __init__(self,
  239. in_dim,
  240. out_dim,
  241. num_blocks = 1,
  242. shortcut = False,
  243. act_type = 'silu',
  244. norm_type = 'BN',
  245. depthwise = False,):
  246. super(RTCBlock, self).__init__()
  247. self.inter_dim = out_dim // 2
  248. self.input_proj = BasicConv(in_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  249. self.m = nn.Sequential(*(
  250. Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
  251. for _ in range(num_blocks)))
  252. self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  253. def forward(self, x):
  254. # Input proj
  255. x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
  256. out = list([x1, x2])
  257. # Bottlenecl
  258. out.extend(m(out[-1]) for m in self.m)
  259. # Output proj
  260. out = self.output_proj(torch.cat(out, dim=1))
  261. return out