In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
%matplotlib inline


torch.manual_seed(12046)

<torch._C.Generator at 0x1388443b0>

In [2]:
# 准备数据
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
# 将数据划分成训练集、验证集、测试集
train_set, val_set = random_split(dataset, [50000, 10000])
test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
len(train_set), len(val_set), len(test_set)

(50000, 10000, 10000)

In [3]:
# 构建数据读取器
train_loader = DataLoader(train_set, batch_size=500, shuffle=True)
val_loader = DataLoader(val_set, batch_size=500, shuffle=True)
test_loader = DataLoader(test_set, batch_size=500, shuffle=True)

In [4]:
class CNN(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, (5, 5))
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(20, 40, (5, 5))
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(40 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        '''
        向前传播
        参数
        ----
        x ：torch.FloatTensor，形状为(B, 1, 28, 28)
        '''
        B = x.shape[0]                        # (B,  1, 28, 28)
        x = self.pool1(F.relu(self.conv1(x))) # (B, 20, 12, 12)
        x = self.pool2(F.relu(self.conv2(x))) # (B, 40,  4,  4)
        x = x.view(B, -1)                     # (B, 40 * 4 * 4)
        x = F.relu(self.fc1(x))               # (B, 120)
        x = self.fc2(x)                       # (B, 10)
        return x

model = CNN()

In [5]:
eval_iters = 10

def estimate_loss(model):
    re = {}
    # 将模型切换至评估模式
    model.eval()
    re['train'] = _loss(model, train_loader)
    re['val'] = _loss(model, val_loader)
    re['test'] = _loss(model, test_loader)
    # 将模型切换至训练模式
    model.train()
    return re

@torch.no_grad()
def _loss(model, data_loader):
    """
    计算模型在不同数据集下面的评估指标
    """
    loss = []
    accuracy = []
    data_iter = iter(data_loader)
    for k in range(eval_iters):
        inputs, labels = next(data_iter)
        B = inputs.shape[0]
        logits = model(inputs)
        # 计算模型损失
        loss.append(F.cross_entropy(logits, labels))
        # 计算预测的准确率
        _, predicted = torch.max(logits, 1)
        accuracy.append((predicted == labels).sum() / B)
    re = {
        'loss': torch.tensor(loss).mean().item(),
        'accuracy': torch.tensor(accuracy).mean().item()
    }
    return re

In [6]:
def train_cnn(model, optimizer, data_loader, epochs=10, penalty=[]):
    lossi = []
    for epoch in range(epochs):
        for i, data in enumerate(data_loader, 0):
            inputs, labels = data
            optimizer.zero_grad()
            logits = model(inputs)
            loss = F.cross_entropy(logits, labels)
            lossi.append(loss.item())
            # 增加惩罚项
            for p in penalty:
                loss += p(model)
            loss.backward()
            optimizer.step()
        # 评估模型，并输出结果
        stats = estimate_loss(model)
        train_loss = f'train loss {stats["train"]["loss"]:.4f}'
        val_loss = f'val loss {stats["val"]["loss"]:.4f}'
        test_loss = f'test loss {stats["test"]["loss"]:.4f}'
        print(f'epoch {epoch:>2}: {train_loss}, {val_loss}, {test_loss}')
        train_acc = f'train acc {stats["train"]["accuracy"]:.4f}'
        val_acc = f'val acc {stats["val"]["accuracy"]:.4f}'
        test_acc = f'test acc {stats["test"]["accuracy"]:.4f}'
        print(f'{"":>10}{train_acc}, {val_acc}, {test_acc}')
    return lossi

In [7]:
stats = {}

In [8]:
stats['cnn'] = train_cnn(model, optim.Adam(model.parameters(), lr=0.01), train_loader, epochs=5)

epoch  0: train loss 0.0558, val loss 0.0485, test loss 0.0498
          train acc 0.9798, val acc 0.9834, test acc 0.9856
epoch  1: train loss 0.0419, val loss 0.0455, test loss 0.0342
          train acc 0.9860, val acc 0.9854, test acc 0.9892
epoch  2: train loss 0.0303, val loss 0.0396, test loss 0.0305
          train acc 0.9914, val acc 0.9878, test acc 0.9886
epoch  3: train loss 0.0209, val loss 0.0335, test loss 0.0359
          train acc 0.9942, val acc 0.9898, test acc 0.9894
epoch  4: train loss 0.0193, val loss 0.0402, test loss 0.0344
          train acc 0.9930, val acc 0.9876, test acc 0.9902


In [9]:
# 在模型中加入批归一化层和随机失活
class CNN2(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 20, (5, 5))
        # 批归一化层
        self.bn1 = nn.BatchNorm2d(20)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(20, 40, (5, 5))
        self.bn2 = nn.BatchNorm2d(40)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(40 * 4 * 4, 120)
        # 随机失活
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        B = x.shape[0]              # (B,  1, 28, 28)
        x = self.bn1(self.conv1(x)) # (B, 20, 24, 24)
        x = self.pool1(F.relu(x))   # (B, 20, 12, 12)
        x = self.bn2(self.conv2(x)) # (B, 40,  8,  8)
        x = self.pool2(F.relu(x))   # (B, 40,  4,  4)
        x = x.view(B, -1)           # (B, 40 * 4 * 4)
        x = F.relu(self.fc1(x))     # (B, 120)
        x = self.dropout(x)
        x = self.fc2(x)             # (B, 10)
        return x

model2 = CNN2()

In [10]:
stats['cnn2'] = train_cnn(model2, optim.Adam(model2.parameters(), lr=0.01), train_loader, epochs=5)

epoch  0: train loss 0.0586, val loss 0.0594, test loss 0.0566
          train acc 0.9816, val acc 0.9840, test acc 0.9834
epoch  1: train loss 0.0436, val loss 0.0416, test loss 0.0417
          train acc 0.9860, val acc 0.9874, test acc 0.9876
epoch  2: train loss 0.0276, val loss 0.0389, test loss 0.0455
          train acc 0.9910, val acc 0.9890, test acc 0.9852
epoch  3: train loss 0.0215, val loss 0.0373, test loss 0.0306
          train acc 0.9938, val acc 0.9888, test acc 0.9910
epoch  4: train loss 0.0304, val loss 0.0390, test loss 0.0403
          train acc 0.9900, val acc 0.9868, test acc 0.9870
