|
@@ -210,7 +210,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
def __init__(self,
|
|
def __init__(self,
|
|
|
d_model :int = 256,
|
|
d_model :int = 256,
|
|
|
num_heads :int = 8,
|
|
num_heads :int = 8,
|
|
|
- mlp_ratio :float = 4.0,
|
|
|
|
|
|
|
+ ffn_dim :int = 1024,
|
|
|
dropout :float = 0.1,
|
|
dropout :float = 0.1,
|
|
|
act_type :str = "relu",
|
|
act_type :str = "relu",
|
|
|
):
|
|
):
|
|
@@ -218,7 +218,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
# ----------- Basic parameters -----------
|
|
# ----------- Basic parameters -----------
|
|
|
self.d_model = d_model
|
|
self.d_model = d_model
|
|
|
self.num_heads = num_heads
|
|
self.num_heads = num_heads
|
|
|
- self.mlp_ratio = mlp_ratio
|
|
|
|
|
|
|
+ self.ffn_dim = ffn_dim
|
|
|
self.dropout = dropout
|
|
self.dropout = dropout
|
|
|
self.act_type = act_type
|
|
self.act_type = act_type
|
|
|
# ----------- Basic parameters -----------
|
|
# ----------- Basic parameters -----------
|
|
@@ -228,7 +228,7 @@ class TransformerEncoderLayer(nn.Module):
|
|
|
self.norm = nn.LayerNorm(d_model)
|
|
self.norm = nn.LayerNorm(d_model)
|
|
|
|
|
|
|
|
# Feedforwaed Network
|
|
# Feedforwaed Network
|
|
|
- self.ffn = FFN(d_model, mlp_ratio, dropout, act_type)
|
|
|
|
|
|
|
+ self.ffn = FFN(d_model, ffn_dim, dropout, act_type)
|
|
|
|
|
|
|
|
def with_pos_embed(self, tensor, pos):
|
|
def with_pos_embed(self, tensor, pos):
|
|
|
return tensor if pos is None else tensor + pos
|
|
return tensor if pos is None else tensor + pos
|
|
@@ -259,7 +259,7 @@ class TransformerEncoder(nn.Module):
|
|
|
d_model :int = 256,
|
|
d_model :int = 256,
|
|
|
num_heads :int = 8,
|
|
num_heads :int = 8,
|
|
|
num_layers :int = 1,
|
|
num_layers :int = 1,
|
|
|
- mlp_ratio :float = 4.0,
|
|
|
|
|
|
|
+ ffn_dim :int = 1024,
|
|
|
pe_temperature : float = 10000.,
|
|
pe_temperature : float = 10000.,
|
|
|
dropout :float = 0.1,
|
|
dropout :float = 0.1,
|
|
|
act_type :str = "relu",
|
|
act_type :str = "relu",
|
|
@@ -269,14 +269,14 @@ class TransformerEncoder(nn.Module):
|
|
|
self.d_model = d_model
|
|
self.d_model = d_model
|
|
|
self.num_heads = num_heads
|
|
self.num_heads = num_heads
|
|
|
self.num_layers = num_layers
|
|
self.num_layers = num_layers
|
|
|
- self.mlp_ratio = mlp_ratio
|
|
|
|
|
|
|
+ self.ffn_dim = ffn_dim
|
|
|
self.dropout = dropout
|
|
self.dropout = dropout
|
|
|
self.act_type = act_type
|
|
self.act_type = act_type
|
|
|
self.pe_temperature = pe_temperature
|
|
self.pe_temperature = pe_temperature
|
|
|
self.pos_embed = None
|
|
self.pos_embed = None
|
|
|
# ----------- Basic parameters -----------
|
|
# ----------- Basic parameters -----------
|
|
|
self.encoder_layers = get_clones(
|
|
self.encoder_layers = get_clones(
|
|
|
- TransformerEncoderLayer(d_model, num_heads, mlp_ratio, dropout, act_type), num_layers)
|
|
|
|
|
|
|
+ TransformerEncoderLayer(d_model, num_heads, ffn_dim, dropout, act_type), num_layers)
|
|
|
|
|
|
|
|
def build_2d_sincos_position_embedding(self, device, w, h, embed_dim=256, temperature=10000.):
|
|
def build_2d_sincos_position_embedding(self, device, w, h, embed_dim=256, temperature=10000.):
|
|
|
assert embed_dim % 4 == 0, \
|
|
assert embed_dim % 4 == 0, \
|
|
@@ -339,7 +339,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
num_heads :int = 8,
|
|
num_heads :int = 8,
|
|
|
num_levels :int = 3,
|
|
num_levels :int = 3,
|
|
|
num_points :int = 4,
|
|
num_points :int = 4,
|
|
|
- mlp_ratio :float = 4.0,
|
|
|
|
|
|
|
+ ffn_dim :int = 1024,
|
|
|
dropout :float = 0.1,
|
|
dropout :float = 0.1,
|
|
|
act_type :str = "relu",
|
|
act_type :str = "relu",
|
|
|
):
|
|
):
|
|
@@ -349,7 +349,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
self.num_heads = num_heads
|
|
self.num_heads = num_heads
|
|
|
self.num_levels = num_levels
|
|
self.num_levels = num_levels
|
|
|
self.num_points = num_points
|
|
self.num_points = num_points
|
|
|
- self.mlp_ratio = mlp_ratio
|
|
|
|
|
|
|
+ self.ffn_dim = ffn_dim
|
|
|
self.dropout = dropout
|
|
self.dropout = dropout
|
|
|
self.act_type = act_type
|
|
self.act_type = act_type
|
|
|
# ---------------- Network parameters ----------------
|
|
# ---------------- Network parameters ----------------
|
|
@@ -362,7 +362,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
|
|
|
self.dropout2 = nn.Dropout(dropout)
|
|
self.dropout2 = nn.Dropout(dropout)
|
|
|
self.norm2 = nn.LayerNorm(d_model)
|
|
self.norm2 = nn.LayerNorm(d_model)
|
|
|
## FFN
|
|
## FFN
|
|
|
- self.ffn = FFN(d_model, mlp_ratio, dropout, act_type)
|
|
|
|
|
|
|
+ self.ffn = FFN(d_model, ffn_dim, dropout, act_type)
|
|
|
|
|
|
|
|
def with_pos_embed(self, tensor, pos):
|
|
def with_pos_embed(self, tensor, pos):
|
|
|
return tensor if pos is None else tensor + pos
|
|
return tensor if pos is None else tensor + pos
|
|
@@ -403,7 +403,7 @@ class DeformableTransformerDecoder(nn.Module):
|
|
|
num_layers :int = 1,
|
|
num_layers :int = 1,
|
|
|
num_levels :int = 3,
|
|
num_levels :int = 3,
|
|
|
num_points :int = 4,
|
|
num_points :int = 4,
|
|
|
- mlp_ratio :float = 4.0,
|
|
|
|
|
|
|
+ ffn_dim :int = 1024,
|
|
|
dropout :float = 0.1,
|
|
dropout :float = 0.1,
|
|
|
act_type :str = "relu",
|
|
act_type :str = "relu",
|
|
|
return_intermediate :bool = False,
|
|
return_intermediate :bool = False,
|
|
@@ -413,13 +413,13 @@ class DeformableTransformerDecoder(nn.Module):
|
|
|
self.d_model = d_model
|
|
self.d_model = d_model
|
|
|
self.num_heads = num_heads
|
|
self.num_heads = num_heads
|
|
|
self.num_layers = num_layers
|
|
self.num_layers = num_layers
|
|
|
- self.mlp_ratio = mlp_ratio
|
|
|
|
|
|
|
+ self.ffn_dim = ffn_dim
|
|
|
self.dropout = dropout
|
|
self.dropout = dropout
|
|
|
self.act_type = act_type
|
|
self.act_type = act_type
|
|
|
self.pos_embed = None
|
|
self.pos_embed = None
|
|
|
# ----------- Network parameters -----------
|
|
# ----------- Network parameters -----------
|
|
|
self.decoder_layers = get_clones(
|
|
self.decoder_layers = get_clones(
|
|
|
- DeformableTransformerDecoderLayer(d_model, num_heads, num_levels, num_points, mlp_ratio, dropout, act_type), num_layers)
|
|
|
|
|
|
|
+ DeformableTransformerDecoderLayer(d_model, num_heads, num_levels, num_points, ffn_dim, dropout, act_type), num_layers)
|
|
|
self.num_layers = num_layers
|
|
self.num_layers = num_layers
|
|
|
self.return_intermediate = return_intermediate
|
|
self.return_intermediate = return_intermediate
|
|
|
|
|
|