mlp.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. import torch.nn as nn
  2. def get_activation(act_type=None):
  3. if act_type == 'relu':
  4. return nn.ReLU(inplace=True)
  5. elif act_type == 'lrelu':
  6. return nn.LeakyReLU(0.1, inplace=True)
  7. elif act_type == 'mish':
  8. return nn.Mish(inplace=True)
  9. elif act_type == 'silu':
  10. return nn.SiLU(inplace=True)
  11. elif act_type == 'gelu':
  12. return nn.GELU()
  13. elif act_type is None:
  14. return nn.Identity()
  15. else:
  16. raise NotImplementedError
  17. # ----------------- MLP modules -----------------
  18. class MLP(nn.Module):
  19. def __init__(self, in_dim, hidden_dim, out_dim, num_layers):
  20. super().__init__()
  21. self.num_layers = num_layers
  22. h = [hidden_dim] * (num_layers - 1)
  23. self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([in_dim] + h, h + [out_dim]))
  24. def forward(self, x):
  25. for i, layer in enumerate(self.layers):
  26. x = nn.functional.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
  27. return x
  28. class FFN(nn.Module):
  29. def __init__(self, d_model=256, ffn_dim=1024, dropout=0., act_type='relu'):
  30. super().__init__()
  31. self.ffn_dim = ffn_dim
  32. self.linear1 = nn.Linear(d_model, self.ffn_dim)
  33. self.activation = get_activation(act_type)
  34. self.dropout2 = nn.Dropout(dropout)
  35. self.linear2 = nn.Linear(self.ffn_dim, d_model)
  36. self.dropout3 = nn.Dropout(dropout)
  37. self.norm = nn.LayerNorm(d_model)
  38. def forward(self, src):
  39. src2 = self.linear2(self.dropout2(self.activation(self.linear1(src))))
  40. src = src + self.dropout3(src2)
  41. src = self.norm(src)
  42. return src