|
|
@@ -0,0 +1,433 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "<torch._C.Generator at 0x7fdbc884f330>"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import torch\n",
|
|
|
+ "import torch.nn as nn\n",
|
|
|
+ "import torch.nn.functional as F\n",
|
|
|
+ "import torch.optim as optim\n",
|
|
|
+ "from torch.utils.data import DataLoader, random_split\n",
|
|
|
+ "from torchvision import datasets\n",
|
|
|
+ "import torchvision.transforms as transforms\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "%matplotlib inline\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "torch.manual_seed(12046)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "(50000, 10000, 10000)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 准备数据\n",
|
|
|
+ "dataset = datasets.MNIST(root='./mnist', train=True, download=True, transform=transforms.ToTensor())\n",
|
|
|
+ "# 将数据划分成训练集、验证集、测试集\n",
|
|
|
+ "train_set, val_set = random_split(dataset, [50000, 10000])\n",
|
|
|
+ "test_set = datasets.MNIST(root='./mnist', train=False, download=True, transform=transforms.ToTensor())\n",
|
|
|
+ "len(train_set), len(val_set), len(test_set)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 构建数据读取器\n",
|
|
|
+ "train_loader = DataLoader(train_set, batch_size=500, shuffle=True)\n",
|
|
|
+ "val_loader = DataLoader(val_set, batch_size=500, shuffle=True)\n",
|
|
|
+ "test_loader = DataLoader(test_set, batch_size=500, shuffle=True)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "eval_iters = 10\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "def estimate_loss(model):\n",
|
|
|
+ " re = {}\n",
|
|
|
+ " # 将模型切换为评估模式\n",
|
|
|
+ " model.eval()\n",
|
|
|
+ " re['train'] = _loss(model, train_loader)\n",
|
|
|
+ " re['val'] = _loss(model, val_loader)\n",
|
|
|
+ " re['test'] = _loss(model, test_loader)\n",
|
|
|
+ " # 将模型切换为训练模式\n",
|
|
|
+ " model.train()\n",
|
|
|
+ " return re\n",
|
|
|
+ "\n",
|
|
|
+ " \n",
|
|
|
+ "@torch.no_grad()\n",
|
|
|
+ "def _loss(model, dataloader):\n",
|
|
|
+ " # 估算模型效果\n",
|
|
|
+ " loss = []\n",
|
|
|
+ " acc = []\n",
|
|
|
+ " data_iter = iter(dataloader)\n",
|
|
|
+ " for t in range(eval_iters):\n",
|
|
|
+ " inputs, labels = next(data_iter)\n",
|
|
|
+ " # inputs: (500, 1, 28, 28)\n",
|
|
|
+ " # labels: (500)\n",
|
|
|
+ " B, C, H, W = inputs.shape\n",
|
|
|
+ " #logits = model(inputs.view(B, -1))\n",
|
|
|
+ " logits = model(inputs)\n",
|
|
|
+ " loss.append(F.cross_entropy(logits, labels))\n",
|
|
|
+ " # preds = torch.argmax(F.softmax(logits, dim=-1), dim=-1)\n",
|
|
|
+ " preds = torch.argmax(logits, dim=-1)\n",
|
|
|
+ " acc.append((preds == labels).sum() / B)\n",
|
|
|
+ " re = {\n",
|
|
|
+ " 'loss': torch.tensor(loss).mean().item(),\n",
|
|
|
+ " 'acc': torch.tensor(acc).mean().item()\n",
|
|
|
+ " }\n",
|
|
|
+ " return re"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def train_model(model, optimizer, epochs=10, penalty=False):\n",
|
|
|
+ " lossi = []\n",
|
|
|
+ " for e in range(epochs):\n",
|
|
|
+ " for data in train_loader:\n",
|
|
|
+ " inputs, labels = data\n",
|
|
|
+ " #B, C, H, W = inputs.shape\n",
|
|
|
+ " #logits = model(inputs.view(B, -1))\n",
|
|
|
+ " logits = model(inputs)\n",
|
|
|
+ " loss = F.cross_entropy(logits, labels)\n",
|
|
|
+ " lossi.append(loss.item())\n",
|
|
|
+ " if penalty:\n",
|
|
|
+ " w = torch.cat([p.view(-1) for p in model.parameters()])\n",
|
|
|
+ " loss += 0.001 * w.abs().sum() + 0.002 * w.square().sum()\n",
|
|
|
+ " optimizer.zero_grad()\n",
|
|
|
+ " loss.backward()\n",
|
|
|
+ " optimizer.step()\n",
|
|
|
+ " stats = estimate_loss(model)\n",
|
|
|
+ " train_loss = f'{stats[\"train\"][\"loss\"]:.3f}'\n",
|
|
|
+ " val_loss = f'{stats[\"val\"][\"loss\"]:.3f}'\n",
|
|
|
+ " test_loss = f'{stats[\"test\"][\"loss\"]:.3f}'\n",
|
|
|
+ " print(f'epoch {e} train {train_loss} val {val_loss} test {test_loss}')\n",
|
|
|
+ " return lossi"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "(torch.Size([1, 4, 28, 28]), torch.Size([1, 4, 28, 28]))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 卷积层的几个小技巧\n",
|
|
|
+ "channels = torch.randint(1, 10, (1,))\n",
|
|
|
+ "conv1 = nn.Conv2d(channels, channels, (3, 3), stride=1, padding=1)\n",
|
|
|
+ "x = torch.randn(1, channels, 28, 28)\n",
|
|
|
+ "x.shape, conv1(x).shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 11,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "class ResidualBlockSimplified(nn.Module):\n",
|
|
|
+ " \n",
|
|
|
+ " def __init__(self, channels):\n",
|
|
|
+ " super().__init__()\n",
|
|
|
+ " self.conv1 = nn.Conv2d(channels, channels, (3, 3), stride=1, padding=1)\n",
|
|
|
+ " self.conv2 = nn.Conv2d(channels, channels, (3, 3), stride=1, padding=1)\n",
|
|
|
+ " \n",
|
|
|
+ " def forward(self, x):\n",
|
|
|
+ " inputs = x\n",
|
|
|
+ " x = F.relu(self.conv1(x))\n",
|
|
|
+ " x = self.conv2(x)\n",
|
|
|
+ " # 残差连接\n",
|
|
|
+ " out = x + inputs\n",
|
|
|
+ " out = F.relu(out)\n",
|
|
|
+ " return out"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "torch.Size([1, 3, 28, 28])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "model = ResidualBlockSimplified(3)\n",
|
|
|
+ "x = torch.randn(1, 3, 28, 28)\n",
|
|
|
+ "model(x).shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "(torch.Size([1, 2, 28, 28]), torch.Size([1, 2, 28, 28]))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 这两个卷积操作的输出是一样形状的\n",
|
|
|
+ "stride = torch.randint(1, 10, (1,))\n",
|
|
|
+ "in_channels = torch.randint(1, 10, (1,))\n",
|
|
|
+ "out_channels = torch.randint(1, 10, (1,))\n",
|
|
|
+ "conv1 = nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=1)\n",
|
|
|
+ "conv2 = nn.Conv2d(in_channels, out_channels, (1, 1), stride=stride, padding=0)\n",
|
|
|
+ "x = torch.randn(1, in_channels, 28, 28)\n",
|
|
|
+ "conv1(x).shape, conv2(x).shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "class ResidualBlock(nn.Module):\n",
|
|
|
+ " \n",
|
|
|
+ " def __init__(self, in_channels, out_channels, stride=1):\n",
|
|
|
+ " super().__init__()\n",
|
|
|
+ " self.conv1 = nn.Conv2d(in_channels, out_channels, (3, 3), stride=stride, padding=1)\n",
|
|
|
+ " self.bn1 = nn.BatchNorm2d(out_channels)\n",
|
|
|
+ " self.conv2 = nn.Conv2d(out_channels, out_channels, (3, 3), stride=1, padding=1)\n",
|
|
|
+ " self.bn2 = nn.BatchNorm2d(out_channels)\n",
|
|
|
+ " self.downsample = None\n",
|
|
|
+ " if stride != 1 or in_channels != out_channels:\n",
|
|
|
+ " self.downsample = nn.Conv2d(in_channels, out_channels, (1, 1), stride=stride, padding=0)\n",
|
|
|
+ " self.bn3 = nn.BatchNorm2d(out_channels)\n",
|
|
|
+ " \n",
|
|
|
+ " def forward(self, x):\n",
|
|
|
+ " inputs = x\n",
|
|
|
+ " x = F.relu(self.bn1(self.conv1(x)))\n",
|
|
|
+ " x = self.bn2(self.conv2(x))\n",
|
|
|
+ " if self.downsample is not None:\n",
|
|
|
+ " inputs = self.bn3(self.downsample(inputs))\n",
|
|
|
+ " out = x + inputs\n",
|
|
|
+ " out = F.relu(out)\n",
|
|
|
+ " return out"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 23,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "torch.Size([1, 5, 4, 4])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 23,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "model = ResidualBlock(3, 5, 2)\n",
|
|
|
+ "x = torch.randn(1, 3, 7, 7)\n",
|
|
|
+ "model(x).shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 24,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "class ResNet(nn.Module):\n",
|
|
|
+ " \n",
|
|
|
+ " def __init__(self):\n",
|
|
|
+ " super().__init__()\n",
|
|
|
+ " self.block1 = ResidualBlock(1, 20)\n",
|
|
|
+ " self.block2 = ResidualBlock(20, 40, stride=2)\n",
|
|
|
+ " self.block3 = ResidualBlock(40, 60, stride=2)\n",
|
|
|
+ " self.block4 = ResidualBlock(60, 80, stride=2)\n",
|
|
|
+ " self.block5 = ResidualBlock(80, 100, stride=2)\n",
|
|
|
+ " self.block6 = ResidualBlock(100, 120, stride=2)\n",
|
|
|
+ " self.fc = nn.Linear(120, 10)\n",
|
|
|
+ " \n",
|
|
|
+ " def forward(self, x):\n",
|
|
|
+ " # x : (B, 1, 28, 28)\n",
|
|
|
+ " B = x.shape[0]\n",
|
|
|
+ " x = self.block1(x) # (B, 20, 28, 28)\n",
|
|
|
+ " x = self.block2(x) # (B, 40, 14, 14)\n",
|
|
|
+ " x = self.block3(x) # (B, 60, 7, 7)\n",
|
|
|
+ " x = self.block4(x) # (B, 80, 4, 4)\n",
|
|
|
+ " x = self.block5(x) # (B, 100, 2, 2)\n",
|
|
|
+ " x = self.block6(x) # (B, 120, 1, 1)\n",
|
|
|
+ " x = self.fc(x.view(B, -1))\n",
|
|
|
+ " return x"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "torch.Size([10, 10])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "model = ResNet()\n",
|
|
|
+ "x = torch.randn(10, 1, 28, 28)\n",
|
|
|
+ "model(x).shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "epoch 0 train 0.055 val 0.057 test 0.058\n",
|
|
|
+ "epoch 1 train 0.038 val 0.046 test 0.047\n",
|
|
|
+ "epoch 2 train 0.024 val 0.040 test 0.037\n",
|
|
|
+ "epoch 3 train 0.023 val 0.048 test 0.041\n",
|
|
|
+ "epoch 4 train 0.045 val 0.067 test 0.067\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "_ = train_model(model, optim.Adam(model.parameters(), lr=0.01), epochs=5)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "{'train': {'loss': 0.05183951184153557, 'acc': 0.9844000935554504},\n",
|
|
|
+ " 'val': {'loss': 0.07237933576107025, 'acc': 0.9765999913215637},\n",
|
|
|
+ " 'test': {'loss': 0.06279060989618301, 'acc': 0.9815999865531921}}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "estimate_loss(model)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.5"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|