import numpy as np import torch import torch.nn as nn # ---------------------------- 2D CNN ---------------------------- class SiLU(nn.Module): """export-friendly version of nn.SiLU()""" @staticmethod def forward(x): return x * torch.sigmoid(x) def get_conv2d(c1, c2, k, p, s, d, g, bias=False): conv = nn.Conv2d(c1, c2, k, stride=s, padding=p, dilation=d, groups=g, bias=bias) return conv def get_activation(act_type=None): if act_type == 'relu': return nn.ReLU(inplace=True) elif act_type == 'lrelu': return nn.LeakyReLU(0.1, inplace=True) elif act_type == 'mish': return nn.Mish(inplace=True) elif act_type == 'silu': return nn.SiLU(inplace=True) def get_norm(norm_type, dim): if norm_type == 'BN': return nn.BatchNorm2d(dim) elif norm_type == 'GN': return nn.GroupNorm(num_groups=32, num_channels=dim) ## Basic conv layer class Conv(nn.Module): def __init__(self, c1, # in channels c2, # out channels k=1, # kernel size p=0, # padding s=1, # padding d=1, # dilation act_type='lrelu', # activation norm_type='BN', # normalization depthwise=False): super(Conv, self).__init__() convs = [] add_bias = False if norm_type else True if depthwise: convs.append(get_conv2d(c1, c1, k=k, p=p, s=s, d=d, g=c1, bias=add_bias)) # depthwise conv if norm_type: convs.append(get_norm(norm_type, c1)) if act_type: convs.append(get_activation(act_type)) # pointwise conv convs.append(get_conv2d(c1, c2, k=1, p=0, s=1, d=d, g=1, bias=add_bias)) if norm_type: convs.append(get_norm(norm_type, c2)) if act_type: convs.append(get_activation(act_type)) else: convs.append(get_conv2d(c1, c2, k=k, p=p, s=s, d=d, g=1, bias=add_bias)) if norm_type: convs.append(get_norm(norm_type, c2)) if act_type: convs.append(get_activation(act_type)) self.convs = nn.Sequential(*convs) def forward(self, x): return self.convs(x) # ---------------------------- YOLOv7 Modules ---------------------------- ## ELAN-Block proposed by YOLOv7 class ELANBlock(nn.Module): def __init__(self, in_dim, out_dim, expand_ratio=0.5, depth=2.0, act_type='silu', norm_type='BN', depthwise=False): super(ELANBlock, self).__init__() inter_dim = int(in_dim * expand_ratio) self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) self.cv3 = nn.Sequential(*[ Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise) for _ in range(round(depth)) ]) self.cv4 = nn.Sequential(*[ Conv(inter_dim, inter_dim, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise) for _ in range(round(depth)) ]) self.out = Conv(inter_dim*4, out_dim, k=1, act_type=act_type, norm_type=norm_type) def forward(self, x): x1 = self.cv1(x) x2 = self.cv2(x) x3 = self.cv3(x2) x4 = self.cv4(x3) out = self.out(torch.cat([x1, x2, x3, x4], dim=1)) return out ## PaFPN's ELAN-Block proposed by YOLOv7 class ELANBlockFPN(nn.Module): def __init__(self, in_dim, out_dim, expand_ratio=0.5, nbranch=4, depth=1, act_type='silu', norm_type='BN', depthwise=False): super(ELANBlockFPN, self).__init__() # Basic parameters inter_dim = int(in_dim * expand_ratio) inter_dim2 = int(inter_dim * expand_ratio) # Network structure self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) self.cv2 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) self.cv3 = nn.ModuleList() for idx in range(round(nbranch)): if idx == 0: cvs = [Conv(inter_dim, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)] else: cvs = [Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)] # deeper if round(depth) > 1: for _ in range(1, round(depth)): cvs.append(Conv(inter_dim2, inter_dim2, k=3, p=1, act_type=act_type, norm_type=norm_type, depthwise=depthwise)) self.cv3.append(nn.Sequential(*cvs)) else: self.cv3.append(cvs[0]) self.out = Conv(inter_dim*2+inter_dim2*len(self.cv3), out_dim, k=1, act_type=act_type, norm_type=norm_type) def forward(self, x): x1 = self.cv1(x) x2 = self.cv2(x) inter_outs = [x1, x2] for m in self.cv3: y1 = inter_outs[-1] y2 = m(y1) inter_outs.append(y2) out = self.out(torch.cat(inter_outs, dim=1)) return out ## DownSample Block proposed by YOLOv7 class DownSample(nn.Module): def __init__(self, in_dim, out_dim, act_type='silu', norm_type='BN', depthwise=False): super().__init__() inter_dim = out_dim // 2 self.mp = nn.MaxPool2d((2, 2), 2) self.cv1 = Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type) self.cv2 = nn.Sequential( Conv(in_dim, inter_dim, k=1, act_type=act_type, norm_type=norm_type), Conv(inter_dim, inter_dim, k=3, p=1, s=2, act_type=act_type, norm_type=norm_type, depthwise=depthwise) ) def forward(self, x): x1 = self.cv1(self.mp(x)) x2 = self.cv2(x) out = torch.cat([x1, x2], dim=1) return out # ---------------------------- RepConv Modules ---------------------------- class RepConv(nn.Module): """ The code referenced to https://github.com/WongKinYiu/yolov7/models/common.py """ # Represented convolution # https://arxiv.org/abs/2101.03697 def __init__(self, c1, c2, k=3, s=1, p=1, g=1, act_type='silu', deploy=False): super(RepConv, self).__init__() # -------------- Basic parameters -------------- self.deploy = deploy self.groups = g self.in_channels = c1 self.out_channels = c2 # -------------- Network parameters -------------- if deploy: self.rbr_reparam = nn.Conv2d(c1, c2, k, s, p, groups=g, bias=True) else: self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None) self.rbr_dense = nn.Sequential( nn.Conv2d(c1, c2, k, s, p, groups=g, bias=False), nn.BatchNorm2d(num_features=c2), ) self.rbr_1x1 = nn.Sequential( nn.Conv2d(c1, c2, kernel_size=1, stride=s, bias=False), nn.BatchNorm2d(num_features=c2), ) self.act = get_activation(act_type) def forward(self, inputs): if hasattr(self, "rbr_reparam"): return self.act(self.rbr_reparam(inputs)) if self.rbr_identity is None: id_out = 0 else: id_out = self.rbr_identity(inputs) return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) def get_equivalent_kernel_bias(self): kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) return ( kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid, ) def _pad_1x1_to_3x3_tensor(self, kernel1x1): if kernel1x1 is None: return 0 else: return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) def _fuse_bn_tensor(self, branch): if branch is None: return 0, 0 if isinstance(branch, nn.Sequential): kernel = branch[0].weight running_mean = branch[1].running_mean running_var = branch[1].running_var gamma = branch[1].weight beta = branch[1].bias eps = branch[1].eps else: assert isinstance(branch, nn.BatchNorm2d) if not hasattr(self, "id_tensor"): input_dim = self.in_channels // self.groups kernel_value = np.zeros( (self.in_channels, input_dim, 3, 3), dtype=np.float32 ) for i in range(self.in_channels): kernel_value[i, i % input_dim, 1, 1] = 1 self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) kernel = self.id_tensor running_mean = branch.running_mean running_var = branch.running_var gamma = branch.weight beta = branch.bias eps = branch.eps std = (running_var + eps).sqrt() t = (gamma / std).reshape(-1, 1, 1, 1) return kernel * t, beta - running_mean * gamma / std def repvgg_convert(self): kernel, bias = self.get_equivalent_kernel_bias() return ( kernel.detach().cpu().numpy(), bias.detach().cpu().numpy(), ) def fuse_conv_bn(self, conv, bn): std = (bn.running_var + bn.eps).sqrt() bias = bn.bias - bn.running_mean * bn.weight / std t = (bn.weight / std).reshape(-1, 1, 1, 1) weights = conv.weight * t bn = nn.Identity() conv = nn.Conv2d(in_channels = conv.in_channels, out_channels = conv.out_channels, kernel_size = conv.kernel_size, stride=conv.stride, padding = conv.padding, dilation = conv.dilation, groups = conv.groups, bias = True, padding_mode = conv.padding_mode) conv.weight = torch.nn.Parameter(weights) conv.bias = torch.nn.Parameter(bias) return conv def fuse_repvgg_block(self): if self.deploy: return self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1]) self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1]) rbr_1x1_bias = self.rbr_1x1.bias weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1]) # Fuse self.rbr_identity if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)): identity_conv_1x1 = nn.Conv2d( in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1, stride=1, padding=0, groups=self.groups, bias=False) identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device) identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze() identity_conv_1x1.weight.data.fill_(0.0) identity_conv_1x1.weight.data.fill_diagonal_(1.0) identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3) identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity) bias_identity_expanded = identity_conv_1x1.bias weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1]) else: bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) ) weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) ) self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded) self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded) self.rbr_reparam = self.rbr_dense self.deploy = True if self.rbr_identity is not None: del self.rbr_identity self.rbr_identity = None if self.rbr_1x1 is not None: del self.rbr_1x1 self.rbr_1x1 = None if self.rbr_dense is not None: del self.rbr_dense self.rbr_dense = None