Browse Source

finish chapter 9

Gen TANG 2 years ago
parent
commit
239fa8708b

+ 294 - 0
ch09_cnn/cnn.ipynb

@@ -0,0 +1,294 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<torch._C.Generator at 0x1388443b0>"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "import torch.optim as optim\n",
+    "from torch.utils.data import DataLoader, random_split\n",
+    "from torchvision import datasets\n",
+    "import torchvision.transforms as transforms\n",
+    "import matplotlib.pyplot as plt\n",
+    "%matplotlib inline\n",
+    "\n",
+    "\n",
+    "torch.manual_seed(12046)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(50000, 10000, 10000)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 准备数据\n",
+    "dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())\n",
+    "# 将数据划分成训练集、验证集、测试集\n",
+    "train_set, val_set = random_split(dataset, [50000, 10000])\n",
+    "test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())\n",
+    "len(train_set), len(val_set), len(test_set)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 构建数据读取器\n",
+    "train_loader = DataLoader(train_set, batch_size=500, shuffle=True)\n",
+    "val_loader = DataLoader(val_set, batch_size=500, shuffle=True)\n",
+    "test_loader = DataLoader(test_set, batch_size=500, shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CNN(nn.Module):\n",
+    "    \n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.conv1 = nn.Conv2d(1, 20, (5, 5))\n",
+    "        self.pool1 = nn.MaxPool2d(2, 2)\n",
+    "        self.conv2 = nn.Conv2d(20, 40, (5, 5))\n",
+    "        self.pool2 = nn.MaxPool2d(2, 2)\n",
+    "        self.fc1 = nn.Linear(40 * 4 * 4, 120)\n",
+    "        self.fc2 = nn.Linear(120, 10)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]                        # (B,  1, 28, 28)\n",
+    "        x = self.pool1(F.relu(self.conv1(x))) # (B, 20, 12, 12)\n",
+    "        x = self.pool2(F.relu(self.conv2(x))) # (B, 40,  4,  4)\n",
+    "        x = x.view(B, -1)                     # (B, 40 * 4 * 4)\n",
+    "        x = F.relu(self.fc1(x))               # (B, 120)\n",
+    "        x = self.fc2(x)                       # (B, 10)\n",
+    "        return x\n",
+    "\n",
+    "model = CNN()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_iters = 10\n",
+    "\n",
+    "def estimate_loss(model):\n",
+    "    re = {}\n",
+    "    # 将模型切换至评估模式\n",
+    "    model.eval()\n",
+    "    re['train'] = _loss(model, train_loader)\n",
+    "    re['val'] = _loss(model, val_loader)\n",
+    "    re['test'] = _loss(model, test_loader)\n",
+    "    # 将模型切换至训练模式\n",
+    "    model.train()\n",
+    "    return re\n",
+    "\n",
+    "@torch.no_grad()\n",
+    "def _loss(model, data_loader):\n",
+    "    \"\"\"\n",
+    "    计算模型在不同数据集下面的评估指标\n",
+    "    \"\"\"\n",
+    "    loss = []\n",
+    "    accuracy = []\n",
+    "    data_iter = iter(data_loader)\n",
+    "    for k in range(eval_iters):\n",
+    "        inputs, labels = next(data_iter)\n",
+    "        B = inputs.shape[0]\n",
+    "        logits = model(inputs)\n",
+    "        # 计算模型损失\n",
+    "        loss.append(F.cross_entropy(logits, labels))\n",
+    "        # 计算预测的准确率\n",
+    "        _, predicted = torch.max(logits, 1)\n",
+    "        accuracy.append((predicted == labels).sum() / B)\n",
+    "    re = {\n",
+    "        'loss': torch.tensor(loss).mean().item(),\n",
+    "        'accuracy': torch.tensor(accuracy).mean().item()\n",
+    "    }\n",
+    "    return re"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train_cnn(model, optimizer, data_loader, epochs=10, penalty=[]):\n",
+    "    lossi = []\n",
+    "    for epoch in range(epochs):\n",
+    "        for i, data in enumerate(data_loader, 0):\n",
+    "            inputs, labels = data\n",
+    "            optimizer.zero_grad()\n",
+    "            logits = model(inputs)\n",
+    "            loss = F.cross_entropy(logits, labels)\n",
+    "            lossi.append(loss.item())\n",
+    "            # 增加惩罚项\n",
+    "            for p in penalty:\n",
+    "                loss += p(model)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "        # 评估模型,并输出结果\n",
+    "        stats = estimate_loss(model)\n",
+    "        train_loss = f'train loss {stats[\"train\"][\"loss\"]:.4f}'\n",
+    "        val_loss = f'val loss {stats[\"val\"][\"loss\"]:.4f}'\n",
+    "        test_loss = f'test loss {stats[\"test\"][\"loss\"]:.4f}'\n",
+    "        print(f'epoch {epoch:>2}: {train_loss}, {val_loss}, {test_loss}')\n",
+    "        train_acc = f'train acc {stats[\"train\"][\"accuracy\"]:.4f}'\n",
+    "        val_acc = f'val acc {stats[\"val\"][\"accuracy\"]:.4f}'\n",
+    "        test_acc = f'test acc {stats[\"test\"][\"accuracy\"]:.4f}'\n",
+    "        print(f'{\"\":>10}{train_acc}, {val_acc}, {test_acc}')\n",
+    "    return lossi"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stats = {}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "epoch  0: train loss 0.0558, val loss 0.0485, test loss 0.0498\n",
+      "          train acc 0.9798, val acc 0.9834, test acc 0.9856\n",
+      "epoch  1: train loss 0.0419, val loss 0.0455, test loss 0.0342\n",
+      "          train acc 0.9860, val acc 0.9854, test acc 0.9892\n",
+      "epoch  2: train loss 0.0303, val loss 0.0396, test loss 0.0305\n",
+      "          train acc 0.9914, val acc 0.9878, test acc 0.9886\n",
+      "epoch  3: train loss 0.0209, val loss 0.0335, test loss 0.0359\n",
+      "          train acc 0.9942, val acc 0.9898, test acc 0.9894\n",
+      "epoch  4: train loss 0.0193, val loss 0.0402, test loss 0.0344\n",
+      "          train acc 0.9930, val acc 0.9876, test acc 0.9902\n"
+     ]
+    }
+   ],
+   "source": [
+    "stats['cnn'] = train_cnn(model, optim.Adam(model.parameters(), lr=0.01), train_loader, epochs=5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class CNN2(nn.Module):\n",
+    "    \n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        self.conv1 = nn.Conv2d(1, 20, (5, 5))\n",
+    "        self.bn1 = nn.BatchNorm2d(20)\n",
+    "        self.pool1 = nn.MaxPool2d(2, 2)\n",
+    "        self.conv2 = nn.Conv2d(20, 40, (5, 5))\n",
+    "        self.bn2 = nn.BatchNorm2d(40)\n",
+    "        self.pool2 = nn.MaxPool2d(2, 2)\n",
+    "        self.fc1 = nn.Linear(40 * 4 * 4, 120)\n",
+    "        self.dropout = nn.Dropout(0.2)\n",
+    "        self.fc2 = nn.Linear(120, 10)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]              # (B,  1, 28, 28)\n",
+    "        x = self.bn1(self.conv1(x)) # (B, 20, 24, 24)\n",
+    "        x = self.pool1(F.relu(x))   # (B, 20, 12, 12)\n",
+    "        x = self.bn2(self.conv2(x)) # (B, 40,  8,  8)\n",
+    "        x = self.pool2(F.relu(x))   # (B, 40,  4,  4)\n",
+    "        x = x.view(B, -1)           # (B, 40 * 4 * 4)\n",
+    "        x = F.relu(self.fc1(x))     # (B, 120)\n",
+    "        x = self.dropout(x)\n",
+    "        x = self.fc2(x)             # (B, 10)\n",
+    "        return x\n",
+    "\n",
+    "model2 = CNN2()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "epoch  0: train loss 0.0586, val loss 0.0594, test loss 0.0566\n",
+      "          train acc 0.9816, val acc 0.9840, test acc 0.9834\n",
+      "epoch  1: train loss 0.0436, val loss 0.0416, test loss 0.0417\n",
+      "          train acc 0.9860, val acc 0.9874, test acc 0.9876\n",
+      "epoch  2: train loss 0.0276, val loss 0.0389, test loss 0.0455\n",
+      "          train acc 0.9910, val acc 0.9890, test acc 0.9852\n",
+      "epoch  3: train loss 0.0215, val loss 0.0373, test loss 0.0306\n",
+      "          train acc 0.9938, val acc 0.9888, test acc 0.9910\n",
+      "epoch  4: train loss 0.0304, val loss 0.0390, test loss 0.0403\n",
+      "          train acc 0.9900, val acc 0.9868, test acc 0.9870\n"
+     ]
+    }
+   ],
+   "source": [
+    "stats['cnn2'] = train_cnn(model2, optim.Adam(model2.parameters(), lr=0.01), train_loader, epochs=5)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 212 - 0
ch09_cnn/conv_example.ipynb

@@ -0,0 +1,212 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = nn.Conv2d(2, 1, (2, 2), stride=1, padding=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k1 = torch.tensor(\n",
+    "    [[1, -2],\n",
+    "     [2, 0]]\n",
+    ").float()\n",
+    "k2 = torch.tensor(\n",
+    "    [[-3, 4],\n",
+    "    [5, 3]]\n",
+    ").float()\n",
+    "kernel = torch.stack((k1, k2), dim=0).unsqueeze(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m.weight = nn.Parameter(kernel)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m.bias = nn.Parameter(torch.tensor([0]).float())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "i1 = torch.tensor(\n",
+    "    [[1, 2, 3],\n",
+    "     [0, 2, -1],\n",
+    "     [2, 3, 1]]\n",
+    ").float()\n",
+    "i2 = torch.tensor(\n",
+    "    [[1, 3, -2],\n",
+    "     [2, 4, 0],\n",
+    "     [3, 3, 1]]\n",
+    ").float()\n",
+    "image = torch.stack((i1, i2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[[ 1.,  2.,  3.],\n",
+       "          [ 0.,  2., -1.],\n",
+       "          [ 2.,  3.,  1.]],\n",
+       " \n",
+       "         [[ 1.,  3., -2.],\n",
+       "          [ 2.,  4.,  0.],\n",
+       "          [ 3.,  3.,  1.]]]),\n",
+       " tensor([[[28.,  3.],\n",
+       "          [34., 16.]]], grad_fn=<SqueezeBackward1>))"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "image, m(image)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class Conv2dSimple:\n",
+    "    \n",
+    "    def __init__(self, in_channel, out_channel, kernel_size, stride=1, padding=0):\n",
+    "        self.stride = stride\n",
+    "        self.padding = padding\n",
+    "        self.out_channel = out_channel\n",
+    "        self.kernel_size = kernel_size\n",
+    "        self.weight = torch.randn((out_channel, in_channel) + kernel_size)\n",
+    "        self.bias = torch.randn(out_channel)\n",
+    "        \n",
+    "    def __call__(self, x):\n",
+    "        data = x.unsqueeze(0) if len(x.shape) == 3 else x\n",
+    "        # B表示批量大小,I表示in_channel个数,H表示高度,W表示宽度\n",
+    "        B, I, H, W = data.shape\n",
+    "        h_step = (H + 2 * self.padding - self.kernel_size[0]) // self.stride + 1\n",
+    "        w_step = (W + 2 * self.padding - self.kernel_size[1]) // self.stride + 1\n",
+    "        # 定义输出的形状\n",
+    "        output = torch.zeros(B, self.out_channel, h_step, w_step)\n",
+    "        # 在图像的边缘增加0\n",
+    "        data = F.pad(data, (self.padding,) * 4)\n",
+    "        for i in range(h_step):\n",
+    "            h_begin = i * self.stride\n",
+    "            h_end = h_begin + self.kernel_size[0]\n",
+    "            for j in range(w_step):\n",
+    "                w_being = j * self.stride\n",
+    "                w_end = w_being + self.kernel_size[1]\n",
+    "                inputs = data[:, :, h_begin: h_end, w_being: w_end].unsqueeze(1)\n",
+    "                # inputs的形状:     (B,           1, I, kernel_size[0], kernel_size[1])\n",
+    "                # self.weight的形状:(   out_channel, I, kernel_size[0], kernel_size[1])\n",
+    "                # linear_out的形状:  (B, out_channel)\n",
+    "                # bias的形状:        (   out_channel)\n",
+    "                linear_out = (inputs * self.weight).sum((-3, -2, -1))\n",
+    "                output[:, :, i, j] = linear_out + self.bias\n",
+    "        self.out = output.squeeze(0) if len(x.shape) == 3 else output\n",
+    "        return self.out\n",
+    "    \n",
+    "    def parameters(self):\n",
+    "        return [self.weight, self.bias]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = Conv2dSimple(2, 1, (2, 2), stride=1, padding=0)\n",
+    "t.weight = nn.Parameter(kernel)\n",
+    "t.bias = nn.Parameter(torch.tensor([0]).float())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[[ 1.,  2.,  3.],\n",
+       "          [ 0.,  2., -1.],\n",
+       "          [ 2.,  3.,  1.]],\n",
+       " \n",
+       "         [[ 1.,  3., -2.],\n",
+       "          [ 2.,  4.,  0.],\n",
+       "          [ 3.,  3.,  1.]]]),\n",
+       " tensor([[[28.,  3.],\n",
+       "          [34., 16.]]], grad_fn=<SqueezeBackward1>))"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "image, t(image)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

BIN
ch09_cnn/data/MNIST/raw/t10k-images-idx3-ubyte


BIN
ch09_cnn/data/MNIST/raw/t10k-images-idx3-ubyte.gz


BIN
ch09_cnn/data/MNIST/raw/t10k-labels-idx1-ubyte


BIN
ch09_cnn/data/MNIST/raw/t10k-labels-idx1-ubyte.gz


BIN
ch09_cnn/data/MNIST/raw/train-images-idx3-ubyte


BIN
ch09_cnn/data/MNIST/raw/train-images-idx3-ubyte.gz


BIN
ch09_cnn/data/MNIST/raw/train-labels-idx1-ubyte


BIN
ch09_cnn/data/MNIST/raw/train-labels-idx1-ubyte.gz


File diff suppressed because it is too large
+ 363 - 0
ch09_cnn/mlp.ipynb


File diff suppressed because it is too large
+ 62 - 0
ch09_cnn/mnist.ipynb


Some files were not shown because too many files changed in this diff