{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "from torch.utils.data import DataLoader, random_split\n", "from torchvision import datasets\n", "import torchvision.transforms as transforms\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "\n", "torch.manual_seed(12046)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(50000, 10000, 10000)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 准备数据\n", "dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())\n", "# 将数据划分成训练集、验证集、测试集\n", "train_set, val_set = random_split(dataset, [50000, 10000])\n", "test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())\n", "len(train_set), len(val_set), len(test_set)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 构建数据读取器\n", "train_loader = DataLoader(train_set, batch_size=500, shuffle=True)\n", "val_loader = DataLoader(val_set, batch_size=500, shuffle=True)\n", "test_loader = DataLoader(test_set, batch_size=500, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([500, 1, 28, 28]), torch.Size([500]), torch.Size([500, 784]))" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 获取一个批量的数据\n", "x, y = next(iter(train_loader))\n", "x.shape, y.shape, x.view(x.shape[0], -1).shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# 两种常见的实现方式\n", "## 自由度更大的实现方式\n", "class MLP(nn.Module):\n", " \n", " def __init__(self):\n", " super().__init__()\n", " self.hidden1 = nn.Linear(784, 30)\n", " self.hidden2 = nn.Linear(30, 20)\n", " self.out = nn.Linear(20, 10)\n", "\n", " def forward(self, x):\n", " '''\n", " 多层感知器的向前传播\n", " 参数\n", " ----\n", " x :torch.FloatTensor,形状为(B, 784),其中B表示批量数据的大小\n", " '''\n", " x = F.sigmoid(self.hidden1(x)) # (B, 30)\n", " x = F.sigmoid(self.hidden2(x)) # (B, 20)\n", " x = self.out(x) # (B, 10)\n", " return x\n", "\n", "model = MLP()\n", "\n", "## 更简洁的实现方式\n", "model = nn.Sequential(\n", " nn.Linear(784, 30), nn.Sigmoid(),\n", " nn.Linear( 30, 20), nn.Sigmoid(),\n", " nn.Linear( 20, 10)\n", ")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "eval_iters = 10\n", "\n", "def estimate_loss(model):\n", " re = {}\n", " # 将模型切换至评估模式\n", " model.eval()\n", " re['train'] = _loss(model, train_loader)\n", " re['val'] = _loss(model, val_loader)\n", " re['test'] = _loss(model, test_loader)\n", " # 将模型切换至训练模式\n", " model.train()\n", " return re\n", "\n", "@torch.no_grad()\n", "def _loss(model, data_loader):\n", " \"\"\"\n", " 计算模型在不同数据集下面的评估指标\n", " \"\"\"\n", " loss = []\n", " accuracy = []\n", " data_iter = iter(data_loader)\n", " for k in range(eval_iters):\n", " inputs, labels = next(data_iter)\n", " B, C, H, W = inputs.shape\n", " # 将数据转换成模型输入要求的形状\n", " # 也可以用如下的命令来完成:inputs.view(-1, 784)\n", " logits = model(inputs.view(B, -1))\n", " # 计算模型损失\n", " loss.append(F.cross_entropy(logits, labels))\n", " # 计算预测的准确率\n", " _, predicted = torch.max(logits, 1)\n", " accuracy.append((predicted == labels).sum() / B)\n", " re = {\n", " 'loss': torch.tensor(loss).mean().item(),\n", " 'accuracy': torch.tensor(accuracy).mean().item()\n", " }\n", " return re" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def train_mlp(model, optimizer, data_loader, epochs=10, penalty=[]):\n", " lossi = []\n", " for epoch in range(epochs):\n", " for i, data in enumerate(data_loader, 0):\n", " inputs, labels = data\n", " optimizer.zero_grad()\n", " B, C, H, W = inputs.shape\n", " # 将数据转换成模型输入要求的形状\n", " # 也可以用如下的命令来完成:inputs.view(-1, 784)\n", " logits = model(inputs.view(B, -1))\n", " loss = F.cross_entropy(logits, labels)\n", " lossi.append(loss.item())\n", " # 增加惩罚项\n", " for p in penalty:\n", " loss += p(model)\n", " loss.backward()\n", " optimizer.step()\n", " # 评估模型,并输出结果\n", " stats = estimate_loss(model)\n", " train_loss = f'train loss {stats[\"train\"][\"loss\"]:.4f}'\n", " val_loss = f'val loss {stats[\"val\"][\"loss\"]:.4f}'\n", " test_loss = f'test loss {stats[\"test\"][\"loss\"]:.4f}'\n", " print(f'epoch {epoch:>2}: {train_loss}, {val_loss}, {test_loss}')\n", " train_acc = f'train acc {stats[\"train\"][\"accuracy\"]:.4f}'\n", " val_acc = f'val acc {stats[\"val\"][\"accuracy\"]:.4f}'\n", " test_acc = f'test acc {stats[\"test\"][\"accuracy\"]:.4f}'\n", " print(f'{\"\":>10}{train_acc}, {val_acc}, {test_acc}')\n", " return lossi" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "stats = {}" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 2.3778, val loss 2.3783, test loss 2.3740\n", " train acc 0.1050, val acc 0.1042, test acc 0.1018\n", "epoch 1: train loss 2.3652, val loss 2.3679, test loss 2.3587\n", " train acc 0.0998, val acc 0.1054, test acc 0.0996\n", "epoch 2: train loss 2.3579, val loss 2.3572, test loss 2.3564\n", " train acc 0.0958, val acc 0.1048, test acc 0.1018\n", "epoch 3: train loss 2.3514, val loss 2.3545, test loss 2.3452\n", " train acc 0.0972, val acc 0.0988, test acc 0.0960\n", "epoch 4: train loss 2.3378, val loss 2.3427, test loss 2.3362\n", " train acc 0.0906, val acc 0.0944, test acc 0.0936\n", "epoch 5: train loss 2.3339, val loss 2.3347, test loss 2.3252\n", " train acc 0.0892, val acc 0.0966, test acc 0.0926\n", "epoch 6: train loss 2.3239, val loss 2.3264, test loss 2.3260\n", " train acc 0.0952, val acc 0.0940, test acc 0.0946\n", "epoch 7: train loss 2.3136, val loss 2.3153, test loss 2.3201\n", " train acc 0.1096, val acc 0.1084, test acc 0.1008\n", "epoch 8: train loss 2.3263, val loss 2.3140, test loss 2.3160\n", " train acc 0.1066, val acc 0.1206, test acc 0.1144\n", "epoch 9: train loss 2.3150, val loss 2.3177, test loss 2.3136\n", " train acc 0.1236, val acc 0.1174, test acc 0.1218\n" ] } ], "source": [ "# 模型参数初始化\n", "# nn.init下面的函数都自动跑在torch.no_grad()的模式下\n", "for m in model:\n", " if isinstance(m, nn.Linear):\n", " nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('sigmoid'))\n", " nn.init.zeros_(m.bias)\n", "\n", "# 使用最经典的标准随机梯度下降法\n", "stats['mlp'] = train_mlp(model, optim.SGD(model.parameters(), lr=0.001), train_loader)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 2.3180, val loss 2.3065, test loss 2.3033\n", " train acc 0.1066, val acc 0.1070, test acc 0.1136\n", "epoch 1: train loss 2.2322, val loss 2.2220, test loss 2.2221\n", " train acc 0.1354, val acc 0.1302, test acc 0.1322\n", "epoch 2: train loss 2.1533, val loss 2.1500, test loss 2.1490\n", " train acc 0.1596, val acc 0.1600, test acc 0.1590\n", "epoch 3: train loss 2.1046, val loss 2.0999, test loss 2.0873\n", " train acc 0.1816, val acc 0.1822, test acc 0.1810\n", "epoch 4: train loss 2.0356, val loss 2.0395, test loss 2.0476\n", " train acc 0.2152, val acc 0.2214, test acc 0.2074\n", "epoch 5: train loss 1.9938, val loss 1.9933, test loss 1.9784\n", " train acc 0.2594, val acc 0.2526, test acc 0.2622\n", "epoch 6: train loss 1.9594, val loss 1.9515, test loss 1.9479\n", " train acc 0.2754, val acc 0.2894, test acc 0.2916\n", "epoch 7: train loss 1.9213, val loss 1.9103, test loss 1.9047\n", " train acc 0.3144, val acc 0.3194, test acc 0.3194\n", "epoch 8: train loss 1.8772, val loss 1.8644, test loss 1.8592\n", " train acc 0.3436, val acc 0.3560, test acc 0.3520\n", "epoch 9: train loss 1.8342, val loss 1.8432, test loss 1.8242\n", " train acc 0.3646, val acc 0.3688, test acc 0.3692\n" ] } ], "source": [ "# 使用更高效的激活函数搭建模型\n", "model1 = nn.Sequential(\n", " nn.Linear(784, 30), nn.ReLU(),\n", " nn.Linear( 30, 20), nn.ReLU(),\n", " nn.Linear( 20, 10)\n", ")\n", "\n", "# 模型参数初始化\n", "# nn.init下面的函数都自动跑在torch.no_grad()的模式下\n", "for m in model1:\n", " if isinstance(m, nn.Linear):\n", " nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('relu'))\n", " nn.init.zeros_(m.bias)\n", " \n", "stats['mlp_relu'] = train_mlp(model1, optim.SGD(model1.parameters(), lr=0.001), train_loader)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 2.2938, val loss 2.2973, test loss 2.2647\n", " train acc 0.1716, val acc 0.1746, test acc 0.1806\n", "epoch 1: train loss 2.1120, val loss 2.1006, test loss 2.0767\n", " train acc 0.2590, val acc 0.2646, test acc 0.2808\n", "epoch 2: train loss 1.9710, val loss 1.9687, test loss 1.9488\n", " train acc 0.3556, val acc 0.3530, test acc 0.3668\n", "epoch 3: train loss 1.8826, val loss 1.8678, test loss 1.8574\n", " train acc 0.4090, val acc 0.4240, test acc 0.4240\n", "epoch 4: train loss 1.7792, val loss 1.7858, test loss 1.7711\n", " train acc 0.4660, val acc 0.4664, test acc 0.4734\n", "epoch 5: train loss 1.7369, val loss 1.7219, test loss 1.7008\n", " train acc 0.4790, val acc 0.4942, test acc 0.5104\n", "epoch 6: train loss 1.6658, val loss 1.6496, test loss 1.6290\n", " train acc 0.5246, val acc 0.5264, test acc 0.5470\n", "epoch 7: train loss 1.6208, val loss 1.6038, test loss 1.5784\n", " train acc 0.5484, val acc 0.5470, test acc 0.5704\n", "epoch 8: train loss 1.5519, val loss 1.5578, test loss 1.5207\n", " train acc 0.5686, val acc 0.5724, test acc 0.5930\n", "epoch 9: train loss 1.4973, val loss 1.4873, test loss 1.4516\n", " train acc 0.5956, val acc 0.5970, test acc 0.6148\n" ] } ], "source": [ "# 在模型中增加归一化层,可加速训练过程\n", "model2 = nn.Sequential(\n", " nn.Linear(784, 30, bias=False), nn.LayerNorm(30), nn.ReLU(),\n", " nn.Linear( 30, 20, bias=False), nn.LayerNorm(20), nn.ReLU(),\n", " nn.Linear( 20, 10)\n", ")\n", "\n", "# 模型参数初始化\n", "# nn.init下面的函数都自动跑在torch.no_grad()的模式下\n", "for m in model2:\n", " if isinstance(m, nn.Linear):\n", " nn.init.xavier_normal_(m.weight, gain=nn.init.calculate_gain('relu'))\n", "\n", "stats['mlp_relu_layer_norm'] = train_mlp(model2, optim.SGD(model2.parameters(), lr=0.001), train_loader)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 展示3种模型的损失下降曲线\n", "fig = plt.figure(figsize=(10, 6), dpi=80)\n", "# 解决中文显示的问题\n", "plt.rcParams['font.sans-serif'] = ['SimHei']\n", "plt.rcParams['axes.unicode_minus'] = False\n", "plt.rcParams.update({'font.size': 13})\n", "plt.ylabel('模型损失', fontsize=18)\n", "style = ['k', 'r-.', 'g--']\n", "for i, l in enumerate(['mlp', 'mlp_relu', 'mlp_relu_layer_norm']):\n", " _l = torch.tensor(stats[l]).view(-1, 10).mean(1)\n", " plt.plot(_l.numpy(), style[i], label=l)\n", "legend = plt.legend(shadow=True)\n", "plt.savefig(\"mnist_mlp_loss.png\", dpi=200)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 0.2021, val loss 0.2017, test loss 0.2014\n", " train acc 0.9420, val acc 0.9372, test acc 0.9430\n", "epoch 1: train loss 0.1160, val loss 0.1818, test loss 0.1695\n", " train acc 0.9644, val acc 0.9454, test acc 0.9488\n", "epoch 2: train loss 0.1054, val loss 0.1394, test loss 0.1379\n", " train acc 0.9694, val acc 0.9556, test acc 0.9596\n", "epoch 3: train loss 0.1051, val loss 0.1511, test loss 0.1387\n", " train acc 0.9658, val acc 0.9534, test acc 0.9584\n", "epoch 4: train loss 0.0837, val loss 0.1261, test loss 0.1376\n", " train acc 0.9770, val acc 0.9624, test acc 0.9620\n", "epoch 5: train loss 0.0682, val loss 0.1265, test loss 0.1214\n", " train acc 0.9800, val acc 0.9622, test acc 0.9656\n", "epoch 6: train loss 0.0871, val loss 0.1341, test loss 0.1344\n", " train acc 0.9722, val acc 0.9602, test acc 0.9588\n", "epoch 7: train loss 0.0652, val loss 0.1211, test loss 0.1327\n", " train acc 0.9812, val acc 0.9596, test acc 0.9612\n", "epoch 8: train loss 0.0558, val loss 0.1403, test loss 0.1296\n", " train acc 0.9798, val acc 0.9594, test acc 0.9620\n", "epoch 9: train loss 0.0461, val loss 0.1123, test loss 0.1212\n", " train acc 0.9868, val acc 0.9670, test acc 0.9676\n" ] } ], "source": [ "# 模型会遭遇比较严重的过拟合问题\n", "model2 = nn.Sequential(\n", " nn.Linear(784, 30, bias=False), nn.LayerNorm(30), nn.ReLU(),\n", " nn.Linear( 30, 20, bias=False), nn.LayerNorm(20), nn.ReLU(),\n", " nn.Linear( 20, 10)\n", ")\n", "\n", "_ = train_mlp(model2, optim.Adam(model2.parameters(), lr=0.01), train_loader)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tensor([-0.7424, 0.6917, -1.3143, 1.0491, 0.8641]) tensor([-0.0000, 0.0000, -0.0000, 2.0982, 0.0000])\n", "tensor([-0.7424, 0.6917, -1.3143, 1.0491, 0.8641]) tensor([-0.7424, 0.6917, -1.3143, 1.0491, 0.8641])\n", "tensor([-0.7424, 0.6917, -1.3143, 1.0491, 0.8641]) tensor([-0.0000, 1.3833, -0.0000, 0.0000, 0.0000])\n" ] } ], "source": [ "# 展示模型评估模式和训练模式的差别\n", "m = nn.Dropout(0.5)\n", "x = torch.randn(5)\n", "# 创建之后,模型处于训练模式\n", "print(x, m(x))\n", "# 模型处于评估模式\n", "m.eval()\n", "print(x, m(x))\n", "# 模型处于训练模式\n", "m.train()\n", "print(x, m(x))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 0.2624, val loss 0.2747, test loss 0.2760\n", " train acc 0.9218, val acc 0.9192, test acc 0.9218\n", "epoch 1: train loss 0.2212, val loss 0.2136, test loss 0.2235\n", " train acc 0.9366, val acc 0.9396, test acc 0.9404\n", "epoch 2: train loss 0.1815, val loss 0.1791, test loss 0.2103\n", " train acc 0.9482, val acc 0.9474, test acc 0.9390\n", "epoch 3: train loss 0.1586, val loss 0.1938, test loss 0.2001\n", " train acc 0.9564, val acc 0.9434, test acc 0.9466\n", "epoch 4: train loss 0.1583, val loss 0.1809, test loss 0.1635\n", " train acc 0.9536, val acc 0.9468, test acc 0.9502\n", "epoch 5: train loss 0.1400, val loss 0.1830, test loss 0.1716\n", " train acc 0.9602, val acc 0.9460, test acc 0.9550\n", "epoch 6: train loss 0.1354, val loss 0.1662, test loss 0.1716\n", " train acc 0.9608, val acc 0.9516, test acc 0.9508\n", "epoch 7: train loss 0.1332, val loss 0.1840, test loss 0.1861\n", " train acc 0.9590, val acc 0.9438, test acc 0.9486\n", "epoch 8: train loss 0.1373, val loss 0.1424, test loss 0.1560\n", " train acc 0.9596, val acc 0.9588, test acc 0.9526\n", "epoch 9: train loss 0.1117, val loss 0.1732, test loss 0.1595\n", " train acc 0.9670, val acc 0.9464, test acc 0.9556\n", "epoch 10: train loss 0.1116, val loss 0.1371, test loss 0.1651\n", " train acc 0.9670, val acc 0.9552, test acc 0.9532\n", "epoch 11: train loss 0.1279, val loss 0.1457, test loss 0.1572\n", " train acc 0.9610, val acc 0.9580, test acc 0.9566\n", "epoch 12: train loss 0.1144, val loss 0.1527, test loss 0.1515\n", " train acc 0.9652, val acc 0.9550, test acc 0.9574\n", "epoch 13: train loss 0.1085, val loss 0.1437, test loss 0.1498\n", " train acc 0.9690, val acc 0.9586, test acc 0.9580\n", "epoch 14: train loss 0.1001, val loss 0.1362, test loss 0.1349\n", " train acc 0.9680, val acc 0.9586, test acc 0.9600\n", "epoch 15: train loss 0.1121, val loss 0.1467, test loss 0.1469\n", " train acc 0.9680, val acc 0.9568, test acc 0.9590\n", "epoch 16: train loss 0.0910, val loss 0.1464, test loss 0.1343\n", " train acc 0.9740, val acc 0.9552, test acc 0.9620\n", "epoch 17: train loss 0.1044, val loss 0.1427, test loss 0.1461\n", " train acc 0.9704, val acc 0.9568, test acc 0.9592\n", "epoch 18: train loss 0.1026, val loss 0.1363, test loss 0.1446\n", " train acc 0.9698, val acc 0.9602, test acc 0.9624\n", "epoch 19: train loss 0.0943, val loss 0.1459, test loss 0.1502\n", " train acc 0.9704, val acc 0.9584, test acc 0.9578\n", "epoch 20: train loss 0.1010, val loss 0.1588, test loss 0.1324\n", " train acc 0.9720, val acc 0.9514, test acc 0.9626\n", "epoch 21: train loss 0.0872, val loss 0.1464, test loss 0.1656\n", " train acc 0.9740, val acc 0.9560, test acc 0.9574\n", "epoch 22: train loss 0.0980, val loss 0.1465, test loss 0.1466\n", " train acc 0.9718, val acc 0.9578, test acc 0.9616\n", "epoch 23: train loss 0.0872, val loss 0.1393, test loss 0.1491\n", " train acc 0.9730, val acc 0.9608, test acc 0.9588\n", "epoch 24: train loss 0.0765, val loss 0.1320, test loss 0.1550\n", " train acc 0.9750, val acc 0.9602, test acc 0.9596\n", "epoch 25: train loss 0.0883, val loss 0.1512, test loss 0.1551\n", " train acc 0.9726, val acc 0.9578, test acc 0.9608\n", "epoch 26: train loss 0.0854, val loss 0.1431, test loss 0.1439\n", " train acc 0.9726, val acc 0.9612, test acc 0.9602\n", "epoch 27: train loss 0.0971, val loss 0.1391, test loss 0.1444\n", " train acc 0.9722, val acc 0.9632, test acc 0.9610\n", "epoch 28: train loss 0.0919, val loss 0.1372, test loss 0.1503\n", " train acc 0.9718, val acc 0.9590, test acc 0.9598\n", "epoch 29: train loss 0.0959, val loss 0.1511, test loss 0.1547\n", " train acc 0.9688, val acc 0.9558, test acc 0.9586\n", "epoch 30: train loss 0.0868, val loss 0.1463, test loss 0.1641\n", " train acc 0.9722, val acc 0.9564, test acc 0.9596\n", "epoch 31: train loss 0.1052, val loss 0.1520, test loss 0.1596\n", " train acc 0.9690, val acc 0.9572, test acc 0.9592\n", "epoch 32: train loss 0.0793, val loss 0.1439, test loss 0.1480\n", " train acc 0.9750, val acc 0.9574, test acc 0.9624\n", "epoch 33: train loss 0.0890, val loss 0.1309, test loss 0.1544\n", " train acc 0.9732, val acc 0.9612, test acc 0.9592\n", "epoch 34: train loss 0.0957, val loss 0.1493, test loss 0.1446\n", " train acc 0.9696, val acc 0.9550, test acc 0.9610\n", "epoch 35: train loss 0.0800, val loss 0.1468, test loss 0.1516\n", " train acc 0.9756, val acc 0.9586, test acc 0.9606\n", "epoch 36: train loss 0.0762, val loss 0.1422, test loss 0.1455\n", " train acc 0.9762, val acc 0.9610, test acc 0.9626\n", "epoch 37: train loss 0.0721, val loss 0.1383, test loss 0.1488\n", " train acc 0.9816, val acc 0.9608, test acc 0.9604\n", "epoch 38: train loss 0.0758, val loss 0.1478, test loss 0.1715\n", " train acc 0.9784, val acc 0.9560, test acc 0.9564\n", "epoch 39: train loss 0.0749, val loss 0.1454, test loss 0.1443\n", " train acc 0.9770, val acc 0.9580, test acc 0.9638\n", "epoch 40: train loss 0.0822, val loss 0.1413, test loss 0.1595\n", " train acc 0.9762, val acc 0.9578, test acc 0.9584\n", "epoch 41: train loss 0.0744, val loss 0.1474, test loss 0.1552\n", " train acc 0.9756, val acc 0.9606, test acc 0.9602\n", "epoch 42: train loss 0.0954, val loss 0.1454, test loss 0.1458\n", " train acc 0.9728, val acc 0.9594, test acc 0.9640\n", "epoch 43: train loss 0.0602, val loss 0.1450, test loss 0.1573\n", " train acc 0.9800, val acc 0.9576, test acc 0.9622\n", "epoch 44: train loss 0.0716, val loss 0.1474, test loss 0.1482\n", " train acc 0.9774, val acc 0.9590, test acc 0.9602\n", "epoch 45: train loss 0.0791, val loss 0.1450, test loss 0.1539\n", " train acc 0.9754, val acc 0.9578, test acc 0.9608\n", "epoch 46: train loss 0.0719, val loss 0.1531, test loss 0.1411\n", " train acc 0.9784, val acc 0.9584, test acc 0.9644\n", "epoch 47: train loss 0.0720, val loss 0.1354, test loss 0.1408\n", " train acc 0.9790, val acc 0.9586, test acc 0.9652\n", "epoch 48: train loss 0.0713, val loss 0.1395, test loss 0.1497\n", " train acc 0.9786, val acc 0.9636, test acc 0.9640\n", "epoch 49: train loss 0.0713, val loss 0.1462, test loss 0.1616\n", " train acc 0.9768, val acc 0.9622, test acc 0.9574\n" ] } ], "source": [ "# 加入dropout之后会减轻过拟合的问题\n", "model3 = nn.Sequential(\n", " nn.Linear(784, 30, bias=False), nn.LayerNorm(30), nn.ReLU(), nn.Dropout(0.2),\n", " nn.Linear( 30, 20, bias=False), nn.LayerNorm(20), nn.ReLU(), nn.Dropout(0.2),\n", " nn.Linear( 20, 10)\n", ")\n", "\n", "_ = train_mlp(model3, optim.Adam(model3.parameters(), lr=0.01), train_loader, epochs=50)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# 定义l1和l2惩罚项\n", "def l1_loss(model, weight):\n", " w = torch.cat([p.view(-1) for p in model.parameters()])\n", " return weight * torch.abs(w).sum()\n", "\n", "def l2_loss(model, weight):\n", " w = torch.cat([p.view(-1) for p in model.parameters()])\n", " return weight * torch.square(w).sum()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch 0: train loss 0.2159, val loss 0.2189, test loss 0.2249\n", " train acc 0.9408, val acc 0.9360, test acc 0.9366\n", "epoch 1: train loss 0.1815, val loss 0.1961, test loss 0.1797\n", " train acc 0.9482, val acc 0.9438, test acc 0.9462\n", "epoch 2: train loss 0.1400, val loss 0.1786, test loss 0.1547\n", " train acc 0.9626, val acc 0.9452, test acc 0.9560\n", "epoch 3: train loss 0.1465, val loss 0.1666, test loss 0.1588\n", " train acc 0.9582, val acc 0.9510, test acc 0.9554\n", "epoch 4: train loss 0.1555, val loss 0.1726, test loss 0.1639\n", " train acc 0.9530, val acc 0.9502, test acc 0.9518\n", "epoch 5: train loss 0.1397, val loss 0.1606, test loss 0.1441\n", " train acc 0.9588, val acc 0.9490, test acc 0.9558\n", "epoch 6: train loss 0.1506, val loss 0.1638, test loss 0.1609\n", " train acc 0.9538, val acc 0.9494, test acc 0.9568\n", "epoch 7: train loss 0.1317, val loss 0.1556, test loss 0.1438\n", " train acc 0.9634, val acc 0.9544, test acc 0.9578\n", "epoch 8: train loss 0.1663, val loss 0.1790, test loss 0.1916\n", " train acc 0.9554, val acc 0.9472, test acc 0.9446\n", "epoch 9: train loss 0.1314, val loss 0.1408, test loss 0.1347\n", " train acc 0.9632, val acc 0.9586, test acc 0.9618\n" ] } ], "source": [ "# 使用l2惩罚性\n", "model2 = nn.Sequential(\n", " nn.Linear(784, 30, bias=False), nn.LayerNorm(30), nn.ReLU(),\n", " nn.Linear( 30, 20, bias=False), nn.LayerNorm(20), nn.ReLU(),\n", " nn.Linear( 20, 10)\n", ")\n", "\n", "p2 = lambda m: l2_loss(m, 0.001)\n", "_ = train_mlp(model2, optim.Adam(model2.parameters(), lr=0.01), train_loader, penalty=[p2])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }