فهرست منبع

chapter 6 finished

Gen TANG 2 سال پیش
والد
کامیت
7ba1f68077
3فایلهای تغییر یافته به همراه688 افزوده شده و 0 حذف شده
  1. 184 0
      ch06-optimizer/gradient_descent.ipynb
  2. 340 0
      ch06-optimizer/pytorch_tutorial.ipynb
  3. 164 0
      ch06-optimizer/stochastic_gradient_descent.ipynb

+ 184 - 0
ch06-optimizer/gradient_descent.ipynb

@@ -0,0 +1,184 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# 固定随机种子,使得运行结果可以稳定复现\n",
+    "torch.manual_seed(1024)\n",
+    "# 产生训练用的数据\n",
+    "x_origin = torch.linspace(100, 300, 200)\n",
+    "# 将变量X归一化,否则梯度下降法很容易不稳定\n",
+    "x = (x_origin - torch.mean(x_origin)) / torch.std(x_origin)\n",
+    "epsilon = torch.randn(x.shape)\n",
+    "y = 10 * x + 5 + epsilon"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 为了使用PyTorch的高层封装函数,我们通过继承Module类来定义函数\n",
+    "class Linear(torch.nn.Module):\n",
+    "    def __init__(self):\n",
+    "        \"\"\"\n",
+    "        定义线性回归模型的参数:a, b\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "        self.a = torch.nn.Parameter(torch.zeros(()))\n",
+    "        self.b = torch.nn.Parameter(torch.zeros(()))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        \"\"\"\n",
+    "        根据当前的参数估计值,得到模型的预测结果\n",
+    "        参数\n",
+    "        ----\n",
+    "        x :torch.tensor,变量x\n",
+    "        返回\n",
+    "        ----\n",
+    "        y_pred :torch.tensor,模型预测值\n",
+    "        \"\"\"\n",
+    "        return self.a * x + self.b\n",
+    "\n",
+    "    def string(self):\n",
+    "        \"\"\"\n",
+    "        输出当前模型的结果\n",
+    "        \"\"\"\n",
+    "        return f'y = {self.a.item():.2f} * x + {self.b.item():.2f}'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step 1, Loss:  125.25; Result: y = 1.98 * x + 1.02\n",
+      "Step 2, Loss:  80.72; Result: y = 3.56 * x + 1.83\n",
+      "Step 3, Loss:  52.16; Result: y = 4.83 * x + 2.49\n",
+      "Step 4, Loss:  33.84; Result: y = 5.85 * x + 3.01\n",
+      "Step 5, Loss:  22.10; Result: y = 6.66 * x + 3.42\n",
+      "Step 6, Loss:  14.57; Result: y = 7.31 * x + 3.76\n",
+      "Step 7, Loss:  9.74; Result: y = 7.83 * x + 4.03\n",
+      "Step 8, Loss:  6.64; Result: y = 8.25 * x + 4.24\n",
+      "Step 9, Loss:  4.66; Result: y = 8.59 * x + 4.41\n",
+      "Step 10, Loss:  3.38; Result: y = 8.85 * x + 4.55\n",
+      "Step 11, Loss:  2.56; Result: y = 9.07 * x + 4.66\n",
+      "Step 12, Loss:  2.04; Result: y = 9.24 * x + 4.74\n",
+      "Step 13, Loss:  1.71; Result: y = 9.38 * x + 4.81\n",
+      "Step 14, Loss:  1.49; Result: y = 9.49 * x + 4.87\n",
+      "Step 15, Loss:  1.35; Result: y = 9.58 * x + 4.91\n",
+      "Step 16, Loss:  1.26; Result: y = 9.65 * x + 4.95\n",
+      "Step 17, Loss:  1.21; Result: y = 9.71 * x + 4.98\n",
+      "Step 18, Loss:  1.17; Result: y = 9.75 * x + 5.00\n",
+      "Step 19, Loss:  1.15; Result: y = 9.79 * x + 5.02\n",
+      "Step 20, Loss:  1.13; Result: y = 9.82 * x + 5.03\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 定义模型\n",
+    "model = Linear()\n",
+    "# 确定最优化算法\n",
+    "learning_rate = 0.1\n",
+    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)\n",
+    "\n",
+    "for t in range(20):\n",
+    "    # 根据当前的参数估计值,得到模型的预测结果\n",
+    "    # 也就是调用forward函数\n",
+    "    y_pred = model(x)\n",
+    "    # 计算损失函数\n",
+    "    loss = (y - y_pred).pow(2).mean()\n",
+    "    # 将上一次的梯度清零\n",
+    "    optimizer.zero_grad()\n",
+    "    # 计算损失函数的梯度\n",
+    "    loss.backward()\n",
+    "    # 迭代更新模型参数的估计值\n",
+    "    optimizer.step()\n",
+    "    print(f'Step {t + 1}, Loss: {loss: .2f}; Result: {model.string()}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Step 1, Loss:  125.25; Result: y = 1.98 * x + 1.02\n",
+      "Step 2, Loss:  80.72; Result: y = 3.56 * x + 1.83\n",
+      "Step 3, Loss:  52.16; Result: y = 4.83 * x + 2.49\n",
+      "Step 4, Loss:  33.84; Result: y = 5.85 * x + 3.01\n",
+      "Step 5, Loss:  22.10; Result: y = 6.66 * x + 3.42\n",
+      "Step 6, Loss:  14.57; Result: y = 7.31 * x + 3.76\n",
+      "Step 7, Loss:  9.74; Result: y = 7.83 * x + 4.03\n",
+      "Step 8, Loss:  6.64; Result: y = 8.25 * x + 4.24\n",
+      "Step 9, Loss:  4.66; Result: y = 8.59 * x + 4.41\n",
+      "Step 10, Loss:  3.38; Result: y = 8.85 * x + 4.55\n",
+      "Step 11, Loss:  2.56; Result: y = 9.07 * x + 4.66\n",
+      "Step 12, Loss:  2.04; Result: y = 9.24 * x + 4.74\n",
+      "Step 13, Loss:  1.71; Result: y = 9.38 * x + 4.81\n",
+      "Step 14, Loss:  1.49; Result: y = 9.49 * x + 4.87\n",
+      "Step 15, Loss:  1.35; Result: y = 9.58 * x + 4.91\n",
+      "Step 16, Loss:  1.26; Result: y = 9.65 * x + 4.95\n",
+      "Step 17, Loss:  1.21; Result: y = 9.71 * x + 4.98\n",
+      "Step 18, Loss:  1.17; Result: y = 9.75 * x + 5.00\n",
+      "Step 19, Loss:  1.15; Result: y = 9.79 * x + 5.02\n",
+      "Step 20, Loss:  1.13; Result: y = 9.82 * x + 5.03\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 利用代码实现PyTorch封装的梯度下降法\n",
+    "model = Linear()\n",
+    "for t in range(20):\n",
+    "    # 根据当前的参数估计值,得到模型的预测结果\n",
+    "    # 也就是调用forward函数\n",
+    "    y_pred = model(x)\n",
+    "    # 计算损失函数\n",
+    "    loss = (y - y_pred).pow(2).mean()\n",
+    "    # 计算损失函数的梯度\n",
+    "    loss.backward()\n",
+    "    with torch.no_grad():\n",
+    "        for param in model.parameters():\n",
+    "            # 迭代更新模型参数的估计值,等同于optimizer.step()\n",
+    "            param -= learning_rate * param.grad\n",
+    "            # 将梯度清零,等同于optimizer.zero_grad()\n",
+    "            param.grad = None\n",
+    "    print(f'Step {t + 1}, Loss: {loss: .2f}; Result: {model.string()}')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

+ 340 - 0
ch06-optimizer/pytorch_tutorial.ipynb

@@ -0,0 +1,340 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: torch in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (2.0.1)\n",
+      "Requirement already satisfied: jinja2 in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from torch) (2.11.2)\n",
+      "Requirement already satisfied: filelock in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from torch) (3.0.12)\n",
+      "Requirement already satisfied: sympy in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from torch) (1.6.2)\n",
+      "Requirement already satisfied: typing-extensions in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from torch) (3.7.4.3)\n",
+      "Requirement already satisfied: networkx in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from torch) (2.5)\n",
+      "Requirement already satisfied: MarkupSafe>=0.23 in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from jinja2->torch) (1.1.1)\n",
+      "Requirement already satisfied: mpmath>=0.19 in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from sympy->torch) (1.1.0)\n",
+      "Requirement already satisfied: decorator>=4.3.0 in /Users/tgbaggio/opt/anaconda3/lib/python3.8/site-packages (from networkx->torch) (4.4.2)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 安装PyTorch\n",
+    "!pip install torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[0., 0., 0.],\n",
+      "        [0., 0., 0.]])\n",
+      "tensor([[1., 1., 1.],\n",
+      "        [1., 1., 1.]])\n",
+      "tensor([[0.8090, 0.7935, 0.2099, 0.9279],\n",
+      "        [0.8136, 0.7422, 0.4769, 0.4955],\n",
+      "        [0.3602, 0.1178, 0.7852, 0.0228]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "# 创建tensor\n",
+    "## 使用tensor封装的函数创建tensor\n",
+    "zeros = torch.zeros(2, 3)\n",
+    "print(zeros)\n",
+    "\n",
+    "ones = torch.ones(2, 3)\n",
+    "print(ones)\n",
+    "\n",
+    "torch.manual_seed(1024)\n",
+    "random = torch.rand(3, 4)\n",
+    "print(random)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[2, 3, 4],\n",
+      "        [1, 0, 1]])\n",
+      "tensor([[2, 3, 4],\n",
+      "        [1, 0, 1]])\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "tensor(True)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 创建tensor\n",
+    "## 从Python对象创建\n",
+    "data = [[2, 3, 4], [1, 0, 1]]\n",
+    "t_data = torch.tensor(data)\n",
+    "print(t_data)\n",
+    "\n",
+    "## 从Numpy对象创建\n",
+    "import numpy as np\n",
+    "\n",
+    "n_data = np.array(data)\n",
+    "tn_data = torch.from_numpy(n_data)\n",
+    "print(tn_data)\n",
+    "\n",
+    "## Numpy bridge,也就是对numpy对象的改变会传导到tensor\n",
+    "n_data += 1\n",
+    "torch.all(torch.from_numpy(n_data) == tn_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3, 4])\n",
+      "torch.Size([1, 3, 4])\n",
+      "torch.Size([3, 4])\n",
+      "tensor(True)\n",
+      "False\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 变换tensor维度\n",
+    "\n",
+    "## 增加或减少数据的维度\n",
+    "a = torch.rand(3, 4)\n",
+    "print(a.shape)\n",
+    "## 增加维度\n",
+    "b = a.unsqueeze(0)\n",
+    "print(b.shape)\n",
+    "## 减少维度\n",
+    "c = b.squeeze(0)\n",
+    "print(c.shape)\n",
+    "## 数据相同,但是维度不同\n",
+    "print(torch.all(c.eq(b)))\n",
+    "print(c.shape == b.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) torch.Size([10])\n",
+      "tensor([[0, 1, 2, 3, 4],\n",
+      "        [5, 6, 7, 8, 9]])\n",
+      "tensor([[0, 5],\n",
+      "        [1, 6],\n",
+      "        [2, 7],\n",
+      "        [3, 8],\n",
+      "        [4, 9]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 变换tensor形状\n",
+    "data = torch.tensor(range(0, 10))\n",
+    "print(data, data.shape)\n",
+    "view1 = data.view(2, 5)\n",
+    "print(view1)\n",
+    "transpose1 = view1.T\n",
+    "print(transpose1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True False\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-6-a26f66520012>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m## 非毗邻存储(contiguous)的对象不能进行view操作\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mview1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_contiguous\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtranspose1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_contiguous\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mview2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtranspose1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Use .reshape(...) instead."
+     ]
+    }
+   ],
+   "source": [
+    "## 非毗邻存储(contiguous)的对象不能进行view操作\n",
+    "print(view1.is_contiguous(), transpose1.is_contiguous())\n",
+    "view2 = transpose1.view(1, 10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[2., 2.],\n",
+      "        [2., 2.]])\n",
+      "tensor([[ 2.,  4.],\n",
+      "        [ 8., 16.]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 逐元素操作(element-wise operations)\n",
+    "twos = torch.ones(2, 2) * 2\n",
+    "print(twos)\n",
+    "\n",
+    "powers = twos ** torch.tensor([[1, 2], [3, 4]])\n",
+    "print(powers)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor([[1, 2, 3],\n",
+      "        [4, 5, 6]])\n",
+      "tensor([1, 2, 3])\n",
+      "tensor([[ 1,  4,  9],\n",
+      "        [ 4, 10, 18]])\n",
+      "torch.Size([4, 5, 3, 2])\n"
+     ]
+    }
+   ],
+   "source": [
+    "## tensor广播,tensor broadcasting\n",
+    "a = torch.tensor(range(1, 7)).view(2, 3)\n",
+    "b = torch.tensor(range(1, 4)).view(   3)\n",
+    "print(a)\n",
+    "print(b)\n",
+    "print(a * b)\n",
+    "\n",
+    "## 关于广播,更复杂的例子\n",
+    "a =     torch.ones(4, 1, 3, 2)\n",
+    "b = a * torch.rand(   5, 1, 2)\n",
+    "print(b.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3, 5])\n",
+      "torch.Size([5, 8, 3, 5])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 矩阵运算\n",
+    "mat1 = torch.randn(3, 4)\n",
+    "mat2 = torch.randn(4, 5)\n",
+    "re = mat1 @ mat2\n",
+    "print(re.shape)\n",
+    "## 矩阵运算的广播\n",
+    "mat1 = torch.randn(5, 1, 3, 4)\n",
+    "mat2 = torch.randn(   8, 4, 5)\n",
+    "re = mat1 @ mat2\n",
+    "print(re.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([])\n",
+      "torch.Size([3])\n",
+      "torch.Size([10, 3])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# 向量运算\n",
+    "# vector x vector\n",
+    "vec1 = torch.randn(3)\n",
+    "vec2 = torch.randn(3)\n",
+    "print((vec1 @ vec2).shape)\n",
+    "# matrix x vector\n",
+    "mat = torch.randn(3, 4)\n",
+    "vec = torch.randn(4)\n",
+    "print((mat @ vec).shape)\n",
+    "# batched matrix x broadcasted vector\n",
+    "mat = torch.randn(10, 3, 4)\n",
+    "vec = torch.randn(4)\n",
+    "print((mat @ vec).shape)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 164 - 0
ch06-optimizer/stochastic_gradient_descent.ipynb


برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است