{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import torch\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "# 固定随机种子,使得运行结果可以稳定复现\n", "torch.manual_seed(1024)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# 产生训练用的数据\n", "x_origin = torch.linspace(100, 300, 200)\n", "# 将变量X归一化,否则梯度下降法很容易不稳定\n", "x = (x_origin - torch.mean(x_origin)) / torch.std(x_origin)\n", "epsilon = torch.randn(x.shape)\n", "y = 10 * x + 5 + epsilon" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# 为了使用PyTorch的高层封装函数,我们通过继承Module类来定义函数\n", "class Linear(torch.nn.Module):\n", " \n", " def __init__(self):\n", " \"\"\"\n", " 定义线性回归模型的参数:a, b\n", " \"\"\"\n", " super().__init__()\n", " self.a = torch.nn.Parameter(torch.zeros(()))\n", " self.b = torch.nn.Parameter(torch.zeros(()))\n", "\n", " def forward(self, x):\n", " \"\"\"\n", " 根据当前的参数估计值,得到模型的预测结果\n", " 参数\n", " ----\n", " x :torch.tensor,变量x\n", " 返回\n", " ----\n", " y_pred :torch.tensor,模型预测值\n", " \"\"\"\n", " return self.a * x + self.b\n", "\n", " def string(self):\n", " \"\"\"\n", " 输出当前模型的结果\n", " \"\"\"\n", " return f'y = {self.a.item():.2f} * x + {self.b.item():.2f}'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Step 1, Loss: 101.19; Result: y = 3.12 * x + -1.99\n", "Step 2, Loss: 3.61; Result: y = 3.48 * x + -2.28\n", "Step 3, Loss: 4.00; Result: y = 3.22 * x + -1.97\n", "Step 4, Loss: 14.92; Result: y = 2.85 * x + -1.22\n", "Step 5, Loss: 25.90; Result: y = 2.68 * x + -0.23\n", "Step 6, Loss: 44.56; Result: y = 2.92 * x + 1.08\n", "Step 7, Loss: 60.46; Result: y = 3.74 * x + 2.61\n", "Step 8, Loss: 60.59; Result: y = 5.07 * x + 4.15\n", "Step 9, Loss: 47.31; Result: y = 6.73 * x + 5.52\n", "Step 10, Loss: 24.05; Result: y = 8.22 * x + 6.48\n", "Step 11, Loss: 14.43; Result: y = 9.36 * x + 5.75\n", "Step 12, Loss: 4.00; Result: y = 9.75 * x + 5.42\n", "Step 13, Loss: 1.48; Result: y = 9.88 * x + 5.28\n", "Step 14, Loss: 0.58; Result: y = 9.89 * x + 5.26\n", "Step 15, Loss: 1.48; Result: y = 9.89 * x + 5.20\n", "Step 16, Loss: 0.95; Result: y = 9.88 * x + 5.18\n", "Step 17, Loss: 1.03; Result: y = 9.88 * x + 5.17\n", "Step 18, Loss: 1.68; Result: y = 9.84 * x + 5.14\n", "Step 19, Loss: 0.55; Result: y = 9.86 * x + 5.15\n", "Step 20, Loss: 1.27; Result: y = 9.94 * x + 5.21\n" ] } ], "source": [ "# 定义每批次用到的数据量\n", "batch_size = 20\n", "# 定义模型\n", "model = Linear()\n", "# 确定最优化算法\n", "learning_rate = 0.1\n", "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", "\n", "for t in range(20):\n", " # 选取当前批次的数据,用于训练模型\n", " ix = (t * batch_size) % len(x)\n", " xx = x[ix: ix + batch_size]\n", " yy = y[ix: ix + batch_size]\n", " yy_pred = model(xx)\n", " # 计算当前批次数据的损失\n", " loss = (yy - yy_pred).pow(2).mean()\n", " # 将上一次的梯度清零\n", " optimizer.zero_grad()\n", " # 计算损失函数的梯度\n", " loss.backward()\n", " # 迭代更新模型参数的估计值\n", " optimizer.step()\n", " # 注意!loss记录的是模型在当前批次数据上的损失,该数值的波动较大\n", " print(f'Step {t + 1}, Loss: {loss: .2f}; Result: {model.string()}')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# 定义损失函数\n", "mse = lambda y, y_pred: (y - y_pred).pow(2).mean()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# 在最优化算法的运行过程中,记录模型在批量数据和整体数据上的损失\n", "# 定义每批次用到的数据量\n", "batch_size = 20\n", "# 定义模型\n", "model = Linear()\n", "# 确定最优化算法\n", "learning_rate = 0.1\n", "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n", "stats = {'batch_loss': [], 'total_loss': []}\n", "\n", "for t in range(20):\n", " # 选取当前批次的数据,用于训练模型\n", " ix = (t * batch_size) % len(x)\n", " xx = x[ix: ix + batch_size]\n", " yy = y[ix: ix + batch_size]\n", " # 计算当前批次数据的损失\n", " loss = mse(yy, model(xx))\n", " # 将上一次的梯度清零\n", " optimizer.zero_grad()\n", " # 计算损失函数的梯度\n", " loss.backward()\n", " # 迭代更新模型参数的估计值\n", " optimizer.step()\n", " # 预估模型在整个数据集上面的损失\n", " stats['batch_loss'].append(loss.item())\n", " stats['total_loss'].append(mse(y, model(x)).item())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 创建一个图形框\n", "fig = plt.figure(figsize=(6, 6), dpi=100)\n", "# 在图形框里只画一幅图\n", "ax = fig.add_subplot(111)\n", "# 解决中文显示问题\n", "plt.rcParams['font.sans-serif'] = ['SimHei']\n", "plt.rcParams['axes.unicode_minus'] = False\n", "plt.rcParams.update({'font.size': 13})\n", "ax.set_xlabel('训练步数', fontsize=18)\n", "ax.set_ylabel('模型损失', fontsize=18)\n", "ax.plot(stats['batch_loss'], 'k', label='批次损失')\n", "ax.plot(stats['total_loss'], 'r-.', label='整体损失')\n", "legend = plt.legend(shadow=True)\n", "plt.savefig(\"sgd.png\", dpi=200)\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }