basic.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. # ---------------------------- NMS ----------------------------
  5. ## basic NMS
  6. def nms(bboxes, scores, nms_thresh):
  7. """"Pure Python NMS."""
  8. x1 = bboxes[:, 0] #xmin
  9. y1 = bboxes[:, 1] #ymin
  10. x2 = bboxes[:, 2] #xmax
  11. y2 = bboxes[:, 3] #ymax
  12. areas = (x2 - x1) * (y2 - y1)
  13. order = scores.argsort()[::-1]
  14. keep = []
  15. while order.size > 0:
  16. i = order[0]
  17. keep.append(i)
  18. # compute iou
  19. xx1 = np.maximum(x1[i], x1[order[1:]])
  20. yy1 = np.maximum(y1[i], y1[order[1:]])
  21. xx2 = np.minimum(x2[i], x2[order[1:]])
  22. yy2 = np.minimum(y2[i], y2[order[1:]])
  23. w = np.maximum(1e-10, xx2 - xx1)
  24. h = np.maximum(1e-10, yy2 - yy1)
  25. inter = w * h
  26. iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-14)
  27. #reserve all the boundingbox whose ovr less than thresh
  28. inds = np.where(iou <= nms_thresh)[0]
  29. order = order[inds + 1]
  30. return keep
  31. ## class-agnostic NMS
  32. def multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh):
  33. # nms
  34. keep = nms(bboxes, scores, nms_thresh)
  35. scores = scores[keep]
  36. labels = labels[keep]
  37. bboxes = bboxes[keep]
  38. return scores, labels, bboxes
  39. ## class-aware NMS
  40. def multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes):
  41. # nms
  42. keep = np.zeros(len(bboxes), dtype=np.int32)
  43. for i in range(num_classes):
  44. inds = np.where(labels == i)[0]
  45. if len(inds) == 0:
  46. continue
  47. c_bboxes = bboxes[inds]
  48. c_scores = scores[inds]
  49. c_keep = nms(c_bboxes, c_scores, nms_thresh)
  50. keep[inds[c_keep]] = 1
  51. keep = np.where(keep > 0)
  52. scores = scores[keep]
  53. labels = labels[keep]
  54. bboxes = bboxes[keep]
  55. return scores, labels, bboxes
  56. ## multi-class NMS
  57. def multiclass_nms(scores, labels, bboxes, nms_thresh, num_classes, class_agnostic=False):
  58. if class_agnostic:
  59. return multiclass_nms_class_agnostic(scores, labels, bboxes, nms_thresh)
  60. else:
  61. return multiclass_nms_class_aware(scores, labels, bboxes, nms_thresh, num_classes)
  62. # ----------------- MLP modules -----------------
  63. class MLP(nn.Module):
  64. def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
  65. super().__init__()
  66. self.num_layers = num_layers
  67. h = [hidden_dim] * (num_layers - 1)
  68. self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
  69. def forward(self, x):
  70. for i, layer in enumerate(self.layers):
  71. x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
  72. return x
  73. class FFN(nn.Module):
  74. def __init__(self, d_model=256, mlp_ratio=4.0, dropout=0., act_type='relu'):
  75. super().__init__()
  76. self.fpn_dim = round(d_model * mlp_ratio)
  77. self.linear1 = nn.Linear(d_model, self.fpn_dim)
  78. self.activation = get_activation(act_type)
  79. self.dropout2 = nn.Dropout(dropout)
  80. self.linear2 = nn.Linear(self.fpn_dim, d_model)
  81. self.dropout3 = nn.Dropout(dropout)
  82. self.norm = nn.LayerNorm(d_model)
  83. def forward(self, src):
  84. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  85. src = src + self.dropout3(src2)
  86. src = self.norm(src)
  87. return src
  88. # ----------------- Basic CNN Ops -----------------
  89. def get_conv2d(c1, c2, k, p, s, g, bias=False):
  90. conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, groups=g, bias=bias)
  91. return conv
  92. def get_activation(act_type=None):
  93. if act_type == 'relu':
  94. return nn.ReLU(inplace=True)
  95. elif act_type == 'lrelu':
  96. return nn.LeakyReLU(0.1, inplace=True)
  97. elif act_type == 'mish':
  98. return nn.Mish(inplace=True)
  99. elif act_type == 'silu':
  100. return nn.SiLU(inplace=True)
  101. elif act_type == 'gelu':
  102. return nn.GELU()
  103. elif act_type is None:
  104. return nn.Identity()
  105. else:
  106. raise NotImplementedError
  107. def get_norm(norm_type, dim):
  108. if norm_type == 'BN':
  109. return nn.BatchNorm2d(dim)
  110. elif norm_type == 'GN':
  111. return nn.GroupNorm(num_groups=32, num_channels=dim)
  112. elif norm_type is None:
  113. return nn.Identity()
  114. else:
  115. raise NotImplementedError
  116. def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
  117. """3x3 convolution with padding"""
  118. return nn.Conv2d(
  119. in_planes,
  120. out_planes,
  121. kernel_size=3,
  122. stride=stride,
  123. padding=dilation,
  124. groups=groups,
  125. bias=False,
  126. dilation=dilation,
  127. )
  128. def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
  129. """1x1 convolution"""
  130. return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
  131. class FrozenBatchNorm2d(torch.nn.Module):
  132. def __init__(self, n):
  133. super(FrozenBatchNorm2d, self).__init__()
  134. self.register_buffer("weight", torch.ones(n))
  135. self.register_buffer("bias", torch.zeros(n))
  136. self.register_buffer("running_mean", torch.zeros(n))
  137. self.register_buffer("running_var", torch.ones(n))
  138. def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
  139. missing_keys, unexpected_keys, error_msgs):
  140. num_batches_tracked_key = prefix + 'num_batches_tracked'
  141. if num_batches_tracked_key in state_dict:
  142. del state_dict[num_batches_tracked_key]
  143. super(FrozenBatchNorm2d, self)._load_from_state_dict(
  144. state_dict, prefix, local_metadata, strict,
  145. missing_keys, unexpected_keys, error_msgs)
  146. def forward(self, x):
  147. # move reshapes to the beginning
  148. # to make it fuser-friendly
  149. w = self.weight.reshape(1, -1, 1, 1)
  150. b = self.bias.reshape(1, -1, 1, 1)
  151. rv = self.running_var.reshape(1, -1, 1, 1)
  152. rm = self.running_mean.reshape(1, -1, 1, 1)
  153. eps = 1e-5
  154. scale = w * (rv + eps).rsqrt()
  155. bias = b - rm * scale
  156. return x * scale + bias
  157. class BasicConv(nn.Module):
  158. def __init__(self,
  159. in_dim, # in channels
  160. out_dim, # out channels
  161. kernel_size=1, # kernel size
  162. padding=0, # padding
  163. stride=1, # padding
  164. act_type :str = 'lrelu', # activation
  165. norm_type :str = 'BN', # normalization
  166. ):
  167. super(BasicConv, self).__init__()
  168. add_bias = False if norm_type else True
  169. self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=1, bias=add_bias)
  170. self.norm = get_norm(norm_type, out_dim)
  171. self.act = get_activation(act_type)
  172. def forward(self, x):
  173. return self.act(self.norm(self.conv(x)))
  174. class DepthwiseConv(nn.Module):
  175. def __init__(self,
  176. in_dim, # in channels
  177. out_dim, # out channels
  178. kernel_size=1, # kernel size
  179. padding=0, # padding
  180. stride=1, # padding
  181. act_type :str = None, # activation
  182. norm_type :str = 'BN', # normalization
  183. ):
  184. super(DepthwiseConv, self).__init__()
  185. assert in_dim == out_dim
  186. add_bias = False if norm_type else True
  187. self.conv = get_conv2d(in_dim, out_dim, k=kernel_size, p=padding, s=stride, g=out_dim, bias=add_bias)
  188. self.norm = get_norm(norm_type, out_dim)
  189. self.act = get_activation(act_type)
  190. def forward(self, x):
  191. return self.act(self.norm(self.conv(x)))
  192. class PointwiseConv(nn.Module):
  193. def __init__(self,
  194. in_dim, # in channels
  195. out_dim, # out channels
  196. act_type :str = 'lrelu', # activation
  197. norm_type :str = 'BN', # normalization
  198. ):
  199. super(DepthwiseConv, self).__init__()
  200. assert in_dim == out_dim
  201. add_bias = False if norm_type else True
  202. self.conv = get_conv2d(in_dim, out_dim, k=1, p=0, s=1, g=1, bias=add_bias)
  203. self.norm = get_norm(norm_type, out_dim)
  204. self.act = get_activation(act_type)
  205. def forward(self, x):
  206. return self.act(self.norm(self.conv(x)))
  207. # ----------------- CNN Modules -----------------
  208. class Bottleneck(nn.Module):
  209. def __init__(self,
  210. in_dim,
  211. out_dim,
  212. expand_ratio = 0.5,
  213. kernel_sizes = [3, 3],
  214. shortcut = True,
  215. act_type = 'silu',
  216. norm_type = 'BN',
  217. depthwise = False,):
  218. super(Bottleneck, self).__init__()
  219. inter_dim = int(out_dim * expand_ratio)
  220. if depthwise:
  221. self.cv1 = nn.Sequential(
  222. DepthwiseConv(in_dim, in_dim, kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type),
  223. PointwiseConv(in_dim, inter_dim, act_type=act_type, norm_type=norm_type),
  224. )
  225. self.cv2 = nn.Sequential(
  226. DepthwiseConv(inter_dim, inter_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type),
  227. PointwiseConv(inter_dim, out_dim, act_type=act_type, norm_type=norm_type),
  228. )
  229. else:
  230. self.cv1 = BasicConv(in_dim, inter_dim, kernel_size=kernel_sizes[0], padding=kernel_sizes[0]//2, act_type=act_type, norm_type=norm_type)
  231. self.cv2 = BasicConv(inter_dim, out_dim, kernel_size=kernel_sizes[1], padding=kernel_sizes[1]//2, act_type=act_type, norm_type=norm_type)
  232. self.shortcut = shortcut and in_dim == out_dim
  233. def forward(self, x):
  234. h = self.cv2(self.cv1(x))
  235. return x + h if self.shortcut else h
  236. class RTCBlock(nn.Module):
  237. def __init__(self,
  238. in_dim,
  239. out_dim,
  240. num_blocks = 1,
  241. shortcut = False,
  242. act_type = 'silu',
  243. norm_type = 'BN',
  244. depthwise = False,):
  245. super(RTCBlock, self).__init__()
  246. self.inter_dim = out_dim // 2
  247. self.input_proj = BasicConv(in_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  248. self.m = nn.Sequential(*(
  249. Bottleneck(self.inter_dim, self.inter_dim, 1.0, [3, 3], shortcut, act_type, norm_type, depthwise)
  250. for _ in range(num_blocks)))
  251. self.output_proj = BasicConv((2 + num_blocks) * self.inter_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  252. def forward(self, x):
  253. # Input proj
  254. x1, x2 = torch.chunk(self.input_proj(x), 2, dim=1)
  255. out = list([x1, x2])
  256. # Bottlenecl
  257. out.extend(m(out[-1]) for m in self.m)
  258. # Output proj
  259. out = self.output_proj(torch.cat(out, dim=1))
  260. return out
  261. class RepVggBlock(nn.Module):
  262. def __init__(self, in_dim, out_dim, act_type='relu', norm_type='BN', alpha=False):
  263. super(RepVggBlock, self).__init__()
  264. self.in_dim = in_dim
  265. self.out_dim = out_dim
  266. self.conv1 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
  267. self.conv2 = BasicConv(in_dim, out_dim, kernel_size=3, padding=1, act_type=None, norm_type=norm_type)
  268. self.act = get_activation(act_type)
  269. if alpha:
  270. self.alpha = nn.Parameter(torch.as_tensor([1.0]).float())
  271. else:
  272. self.alpha = None
  273. def forward(self, x):
  274. if hasattr(self, 'conv'):
  275. y = self.conv(x)
  276. else:
  277. if self.alpha:
  278. y = self.conv1(x) + self.alpha * self.conv2(x)
  279. else:
  280. y = self.conv1(x) + self.conv2(x)
  281. y = self.act(y)
  282. return y
  283. def convert_to_deploy(self):
  284. if not hasattr(self, 'conv'):
  285. self.conv = nn.Conv2d(
  286. self.in_dim,
  287. self.out_dim,
  288. kernel_size=3,
  289. stride=1,
  290. padding=1,
  291. groups=1)
  292. kernel, bias = self.get_equivalent_kernel_bias()
  293. # self.conv.weight.set_value(kernel)
  294. # self.conv.bias.set_value(bias)
  295. self.conv.weight.data = kernel
  296. self.conv.bias.data = bias
  297. self.__delattr__('conv1')
  298. self.__delattr__('conv2')
  299. def get_equivalent_kernel_bias(self):
  300. kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
  301. kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
  302. if self.alpha:
  303. return kernel3x3 + self.alpha * self._pad_1x1_to_3x3_tensor(
  304. kernel1x1), bias3x3 + self.alpha * bias1x1
  305. else:
  306. return kernel3x3 + self._pad_1x1_to_3x3_tensor(
  307. kernel1x1), bias3x3 + bias1x1
  308. def _pad_1x1_to_3x3_tensor(self, kernel1x1):
  309. if kernel1x1 is None:
  310. return 0
  311. else:
  312. return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
  313. def _fuse_bn_tensor(self, branch):
  314. if branch is None:
  315. return 0, 0
  316. kernel = branch.conv.weight
  317. running_mean = branch.bn._mean
  318. running_var = branch.bn._variance
  319. gamma = branch.bn.weight
  320. beta = branch.bn.bias
  321. eps = branch.bn._epsilon
  322. std = (running_var + eps).sqrt()
  323. t = (gamma / std).reshape((-1, 1, 1, 1))
  324. return kernel * t, beta - running_mean * gamma / std
  325. class CSPRepLayer(nn.Module):
  326. def __init__(self,
  327. in_dim :int,
  328. out_dim :int,
  329. num_blocks :int = 3,
  330. expansion :float = 1.0,
  331. act_type :str ="silu",
  332. norm_type :str = 'BN'):
  333. super(CSPRepLayer, self).__init__()
  334. hidden_dim = int(out_dim * expansion)
  335. self.conv1 = BasicConv(
  336. in_dim, hidden_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  337. self.conv2 = BasicConv(
  338. in_dim, hidden_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  339. self.bottlenecks = nn.Sequential(*[
  340. RepVggBlock(
  341. hidden_dim, hidden_dim, act_type=act_type, norm_type=norm_type)
  342. for _ in range(num_blocks)
  343. ])
  344. if hidden_dim != out_dim:
  345. self.conv3 = BasicConv(hidden_dim, out_dim, kernel_size=1, act_type=act_type, norm_type=norm_type)
  346. else:
  347. self.conv3 = nn.Identity()
  348. def forward(self, x):
  349. x_1 = self.conv1(x)
  350. x_1 = self.bottlenecks(x_1)
  351. x_2 = self.conv2(x)
  352. return self.conv3(x_1 + x_2)