| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import torch.nn as nn
- def get_activation(act_type=None):
- if act_type == 'relu':
- return nn.ReLU(inplace=True)
- elif act_type == 'lrelu':
- return nn.LeakyReLU(0.1, inplace=True)
- elif act_type == 'gelu':
- return nn.GELU()
- elif act_type == 'mish':
- return nn.Mish(inplace=True)
- elif act_type == 'silu':
- return nn.SiLU(inplace=True)
- elif act_type is None:
- return nn.Identity()
- class MLP(nn.Module):
- def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
- super().__init__()
- self.num_layers = num_layers
- h = [hidden_dim] * (num_layers - 1)
- self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
- def forward(self, x):
- for i, layer in enumerate(self.layers):
- x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
- return x
- class FFN(nn.Module):
- def __init__(self, d_model=256, ffn_dim=1024, dropout=0., act_type='relu', pre_norm=False):
- super().__init__()
- # ----------- Basic parameters -----------
- self.pre_norm = pre_norm
- self.ffn_dim = ffn_dim
- # ----------- Network parameters -----------
- self.linear1 = nn.Linear(d_model, self.ffn_dim)
- self.activation = get_activation(act_type)
- self.dropout2 = nn.Dropout(dropout)
- self.linear2 = nn.Linear(self.ffn_dim, d_model)
- self.dropout3 = nn.Dropout(dropout)
- self.norm = nn.LayerNorm(d_model)
- def forward(self, src):
- if self.pre_norm:
- src = self.norm(src)
- src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
- src = src + self.dropout3(src2)
- else:
- src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
- src = src + self.dropout3(src2)
- src = self.norm(src)
-
- return src
|