mlp.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import torch.nn as nn
  2. def get_activation(act_type=None):
  3. if act_type == 'relu':
  4. return nn.ReLU(inplace=True)
  5. elif act_type == 'lrelu':
  6. return nn.LeakyReLU(0.1, inplace=True)
  7. elif act_type == 'gelu':
  8. return nn.GELU()
  9. elif act_type == 'mish':
  10. return nn.Mish(inplace=True)
  11. elif act_type == 'silu':
  12. return nn.SiLU(inplace=True)
  13. elif act_type is None:
  14. return nn.Identity()
  15. class MLP(nn.Module):
  16. def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
  17. super().__init__()
  18. self.num_layers = num_layers
  19. h = [hidden_dim] * (num_layers - 1)
  20. self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
  21. def forward(self, x):
  22. for i, layer in enumerate(self.layers):
  23. x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
  24. return x
  25. class FFN(nn.Module):
  26. def __init__(self, d_model=256, ffn_dim=1024, dropout=0., act_type='relu', pre_norm=False):
  27. super().__init__()
  28. # ----------- Basic parameters -----------
  29. self.pre_norm = pre_norm
  30. self.ffn_dim = ffn_dim
  31. # ----------- Network parameters -----------
  32. self.linear1 = nn.Linear(d_model, self.ffn_dim)
  33. self.activation = get_activation(act_type)
  34. self.dropout2 = nn.Dropout(dropout)
  35. self.linear2 = nn.Linear(self.ffn_dim, d_model)
  36. self.dropout3 = nn.Dropout(dropout)
  37. self.norm = nn.LayerNorm(d_model)
  38. def forward(self, src):
  39. if self.pre_norm:
  40. src = self.norm(src)
  41. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  42. src = src + self.dropout3(src2)
  43. else:
  44. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  45. src = src + self.dropout3(src2)
  46. src = self.norm(src)
  47. return src