junxiaoyao
/
YOLO-Tutorial-v2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
							import torch
import torch.nn as nn

try:
    from .modules import ConvModule
except:
    from  modules import ConvModule


# Convolutional Network
class ConvNet(nn.Module):
    def __init__(self,
                 img_size      :int = 224,
                 in_dim        :int = 3,
                 hidden_dim    :int = 16,
                 num_classes   :int = 10,
                 act_type      :str = "relu",
                 norm_type     :str = "bn",
                 depthwise     :bool = False,
                 use_adavgpool :bool = True,
                 ) -> None:
        super().__init__()
        # ---------- Basic parameters ----------
        self.img_size    = img_size
        self.num_classes = num_classes
        self.act_type    = act_type
        self.norm_type   = norm_type
        self.use_adavgpool = use_adavgpool
        self.layer_dims    = [hidden_dim, hidden_dim*2, hidden_dim*4, hidden_dim*4]
        # ---------- Model parameters ----------
        self.layer_1 = nn.Sequential(
            ConvModule(in_dim, hidden_dim,
                       kernel_size=3, padding=1, stride=2,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise),
            ConvModule(hidden_dim, hidden_dim,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)            
        )
        self.layer_2 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            ConvModule(hidden_dim, hidden_dim * 2,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise),
            ConvModule(hidden_dim * 2, hidden_dim * 2,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
        )
        self.layer_3 = nn.Sequential(
            nn.MaxPool2d(kernel_size=2, stride=2),
            ConvModule(hidden_dim * 2, hidden_dim * 4,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise),
            ConvModule(hidden_dim * 4, hidden_dim * 4,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
        )
        self.layer_4 = nn.Sequential(
            ConvModule(hidden_dim * 4, hidden_dim * 4,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise),
            ConvModule(hidden_dim * 4, hidden_dim * 4,
                       kernel_size=3, padding=1, stride=1,
                       act_type=act_type, norm_type=norm_type, depthwise=depthwise)
        )

        if use_adavgpool:
            self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
            self.fc      = nn.Linear(hidden_dim * 4, num_classes)
        else:
            self.avgpool = None
            fc_in_dim    = (img_size // 8) ** 2 * (hidden_dim * 4)  # N = Co x Ho x W
            self.fc      = nn.Linear(fc_in_dim , num_classes)

    def forward(self, x):
        """
        Input:
            x : (torch.Tensor) -> [B, C, H, W]
        Output:
            x : (torch.Tensor) -> [B, Nc], Nc is the number of the object categories.
        """
        # [B, C_in, H, W]   -> [B, C1, H/2, W/2]
        x = self.layer_1(x)
        # [B, C1, H/2, W/2] -> [B, C2, H/4, W/4]
        x = self.layer_2(x)
        # [B, C2, H/4, W/4] -> [B, C3, H/8, W/8]
        x = self.layer_3(x)
        # [B, C3, H/8, W/8] -> [B, C3, H/8, W/8]
        x = self.layer_4(x)

        if self.use_adavgpool:
            x = self.avgpool(x)

        # reshape [B, Co, Ho, Wo] to [B, N], N = Co x Ho x Wo
        x = x.flatten(1)
        x = self.fc(x)

        return x


if __name__ == "__main__":
    bs, img_dim, img_size = 8, 3, 28
    hidden_dim  = 16
    num_classes = 10
    
    # Make an input data randomly
    x = torch.randn(bs, img_dim, img_size, img_size)

    # Build a MLP model
    model = ConvNet(img_size      = img_size,
                    in_dim        = img_dim,
                    hidden_dim    = hidden_dim,
                    num_classes   = num_classes,
                    act_type      = 'relu',
                    norm_type     = 'bn',
                    depthwise     = False,
                    use_adavgpool = False)

    # Inference
    output = model(x)
    print(output.shape)