In [1]:
import torch
import torch.nn as nn

In [2]:
def conv_example(in_channel, kernel):
 # in_channel: (28, 28)
 # kernel: ( 5, 5)
 output = torch.zeros(24, 24)
 for h in range(24):
 for w in range(24):
 inputs = in_channel[h: h + 5, w: w + 5]
 output[h, w] = (inputs * kernel).sum()
 return output

In [4]:
m = nn.Conv2d(1, 1, (5, 5), bias=False)

In [5]:
x = torch.randn(1, 1, 28, 28)

In [6]:
re = m(x)
re.shape

torch.Size([1, 1, 24, 24])

In [7]:
m.weight.shape

torch.Size([1, 1, 5, 5])

In [8]:
re1 = conv_example(x.squeeze(0, 1), m.weight.squeeze(0, 1))

In [9]:
re1.shape

torch.Size([24, 24])

In [12]:
torch.all((re - re1).abs() < 0.001)

tensor(True)

In [13]:
m1 = nn.Conv2d(3, 4, (5, 5))
m1.weight.shape

torch.Size([4, 3, 5, 5])

In [14]:
x1 = torch.randn(10, 3, 28, 28)
m1(x1).shape

torch.Size([10, 4, 24, 24])

In [15]:
p = nn.MaxPool2d(2, 2)
p(x1).shape

torch.Size([10, 3, 14, 14])

In [16]:
### 卷积神经网络的实现
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt


torch.manual_seed(12046)

dataset = datasets.MNIST(root='./mnist', train=True, download=True, transform=transforms.ToTensor())
train_set, val_set = random_split(dataset, [50000, 10000])
test_set = datasets.MNIST(root='./mnist', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(train_set, batch_size=500, shuffle=True)
val_loader = DataLoader(val_set, batch_size=500, shuffle=True)
test_loader = DataLoader(test_set, batch_size=500, shuffle=True)

In [23]:
class CNN(nn.Module):
 
 def __init__(self):
 super().__init__()
 self.conv1 = nn.Conv2d(1, 20, (5, 5))
 self.pool1 = nn.MaxPool2d(2, 2)
 self.conv2 = nn.Conv2d(20, 40, (5, 5))
 self.pool2 = nn.MaxPool2d(2, 2)
 self.fc1 = nn.Linear(40 * 4 * 4, 120)
 self.fc2 = nn.Linear(120, 10)
 
 def forward(self, x):
 # x : (B, 1, 28, 28)
 B = x.shape[0] # (B, 1, 28, 28)
 x = F.relu(self.conv1(x)) # (B, 20, 24, 24)
 x = self.pool1(x) # (B, 20, 12, 12)
 x = F.relu(self.conv2(x)) # (B, 40, 8, 8)
 x = self.pool2(x) # (B, 40, 4, 4)
 x = F.relu(self.fc1(x.view(B, -1))) # (B, 120)
 x = self.fc2(x) # (B, 10)
 return x
 
model = CNN()

In [24]:
eval_iters = 10


def estimate_loss(model):
 re = {}
 # 将模型切换为评估模式
 model.eval()
 re['train'] = _loss(model, train_loader)
 re['val'] = _loss(model, val_loader)
 re['test'] = _loss(model, test_loader)
 # 将模型切换为训练模式
 model.train()
 return re

 
@torch.no_grad()
def _loss(model, dataloader):
 # 估算模型效果
 loss = []
 acc = []
 data_iter = iter(dataloader)
 for t in range(eval_iters):
 inputs, labels = next(data_iter)
 # inputs: (500, 1, 28, 28)
 # labels: (500)
 B, C, H, W = inputs.shape
 #logits = model(inputs.view(B, -1))
 logits = model(inputs)
 loss.append(F.cross_entropy(logits, labels))
 # preds = torch.argmax(F.softmax(logits, dim=-1), dim=-1)
 preds = torch.argmax(logits, dim=-1)
 acc.append((preds == labels).sum() / B)
 re = {
 'loss': torch.tensor(loss).mean().item(),
 'acc': torch.tensor(acc).mean().item()
 }
 return re

In [25]:
def train_model(model, optimizer, epochs=10, penalty=False):
 lossi = []
 for e in range(epochs):
 for data in train_loader:
 inputs, labels = data
 #B, C, H, W = inputs.shape
 #logits = model(inputs.view(B, -1))
 logits = model(inputs)
 loss = F.cross_entropy(logits, labels)
 lossi.append(loss.item())
 if penalty:
 w = torch.cat([p.view(-1) for p in model.parameters()])
 loss += 0.001 * w.abs().sum() + 0.002 * w.square().sum()
 optimizer.zero_grad()
 loss.backward()
 optimizer.step()
 stats = estimate_loss(model)
 train_loss = f'{stats["train"]["loss"]:.3f}'
 val_loss = f'{stats["val"]["loss"]:.3f}'
 test_loss = f'{stats["test"]["loss"]:.3f}'
 print(f'epoch {e} train {train_loss} val {val_loss} test {test_loss}')
 return lossi

In [26]:
_ = train_model(model, optim.Adam(model.parameters(), lr=0.01))

epoch 0 train 0.064 val 0.068 test 0.070
epoch 1 train 0.038 val 0.048 test 0.036
epoch 2 train 0.032 val 0.036 test 0.039
epoch 3 train 0.028 val 0.039 test 0.035
epoch 4 train 0.017 val 0.046 test 0.027
epoch 5 train 0.018 val 0.035 test 0.039
epoch 6 train 0.018 val 0.053 test 0.047
epoch 7 train 0.010 val 0.042 test 0.037
epoch 8 train 0.007 val 0.044 test 0.035
epoch 9 train 0.018 val 0.065 test 0.053


In [27]:
estimate_loss(model)

{'train': {'loss': 0.02370004542171955, 'acc': 0.9912000894546509},
 'val': {'loss': 0.05751935765147209, 'acc': 0.9878000020980835},
 'test': {'loss': 0.06482766568660736, 'acc': 0.984000027179718}}

In [29]:
class CNN2(nn.Module):
 
 def __init__(self):
 super().__init__()
 self.conv1 = nn.Conv2d(1, 20, (5, 5))
 self.ln1 = nn.LayerNorm([20, 24, 24])
 self.pool1 = nn.MaxPool2d(2, 2)
 self.conv2 = nn.Conv2d(20, 40, (5, 5))
 self.ln2 = nn.LayerNorm([40, 8, 8])
 self.pool2 = nn.MaxPool2d(2, 2)
 self.fc1 = nn.Linear(40 * 4 * 4, 120)
 self.dp = nn.Dropout(0.2)
 self.fc2 = nn.Linear(120, 10)
 
 def forward(self, x):
 # x : (B, 1, 28, 28)
 B = x.shape[0] # (B, 1, 28, 28)
 x = F.relu(self.ln1(self.conv1(x))) # (B, 20, 24, 24)
 x = self.pool1(x) # (B, 20, 12, 12)
 x = F.relu(self.ln2(self.conv2(x))) # (B, 40, 8, 8)
 x = self.pool2(x) # (B, 40, 4, 4)
 x = F.relu(self.fc1(x.view(B, -1))) # (B, 120)
 x = self.dp(x)
 x = self.fc2(x) # (B, 10)
 return x
 
model2 = CNN2()

In [30]:
_ = train_model(model2, optim.Adam(model2.parameters(), lr=0.01))

epoch 0 train 0.084 val 0.084 test 0.079
epoch 1 train 0.051 val 0.052 test 0.040
epoch 2 train 0.026 val 0.045 test 0.038
epoch 3 train 0.035 val 0.050 test 0.046
epoch 4 train 0.034 val 0.055 test 0.037
epoch 5 train 0.024 val 0.035 test 0.031
epoch 6 train 0.014 val 0.039 test 0.028
epoch 7 train 0.018 val 0.042 test 0.031
epoch 8 train 0.017 val 0.040 test 0.042
epoch 9 train 0.012 val 0.043 test 0.029
