Browse Source

update comment for ch08

Gen TANG 2 years ago
parent
commit
83d73ee042

+ 12 - 0
ch08_mlp/README.md

@@ -0,0 +1,12 @@
+
+|代码|说明|
+|---|---|
+|[utils.py](utils.py)| 定义多层感知器的模型组件,比如线性模型,Sigmoid函数等 |
+|[perceptron.ipynb](perceptron.ipynb)| 展示感知器模型对应的计算图 |
+|[logit_regression.ipynb](logit_regression.ipynb)| 按照神经网络的方式重新搭建逻辑回归模型,并训练模型 |
+|[mlp.ipynb](mlp.ipynb)| 搭建多层感知器模型,并展示该模型的通用性 |
+|[saturated\_activation_function.ipynb](saturated_activation_function.ipynb)| 通过计算图,展示坏死的神经细胞 |
+|[activation_monitoring.ipynb](activation_monitoring.ipynb)| 监控模型的训练情况 |
+|[activation_functions.ipynb](activation_functions.ipynb)| 常用的激活函数 |
+|[initialization.ipynb](initialization.ipynb)| 参数初始化的优化方案 |
+|[normalization.ipynb](normalization.ipynb)| 归一化层 |

+ 4 - 4
ch08_mlp/activation_functions.ipynb

@@ -45,7 +45,7 @@
     "x = torch.linspace(-10, 10, 1000)\n",
     "x.requires_grad = True\n",
     "y = torch.sigmoid(x)\n",
-    "draw_graph(x, y).savefig(\"sigmoid.png\", dpi=200)"
+    "draw_graph(x, y).savefig('sigmoid.png', dpi=200)"
    ]
   },
   {
@@ -71,7 +71,7 @@
     "x = torch.linspace(-10, 10, 1000)\n",
     "x.requires_grad = True\n",
     "y = torch.tanh(x)\n",
-    "draw_graph(x, y).savefig(\"tanh.png\", dpi=200)"
+    "draw_graph(x, y).savefig('tanh.png', dpi=200)"
    ]
   },
   {
@@ -97,7 +97,7 @@
     "x = torch.linspace(-10, 10, 1000)\n",
     "x.requires_grad = True\n",
     "y = torch.nn.functional.relu(x)\n",
-    "draw_graph(x, y).savefig(\"relu.png\", dpi=200)"
+    "draw_graph(x, y).savefig('relu.png', dpi=200)"
    ]
   },
   {
@@ -158,7 +158,7 @@
     "    legends.append(key)\n",
     "func_ax.legend(legends, shadow=True)\n",
     "gradient_ax.legend(legends, shadow=True)\n",
-    "fig.savefig(\"relu_family.png\", dpi=200)"
+    "fig.savefig('relu_family.png', dpi=200)"
    ]
   }
  ],

+ 10 - 8
ch08_mlp/activation_monitoring.ipynb

@@ -39,7 +39,7 @@
     "from sklearn.preprocessing import StandardScaler\n",
     "import numpy as np\n",
     "\n",
-    "\n",
+    "# 生成训练数据并对数据做归一化处理\n",
     "np.random.seed(12046)\n",
     "data = make_moons(n_samples=2000, noise=.05)\n",
     "scaler = StandardScaler()\n",
@@ -55,6 +55,7 @@
     "def train_model(max_steps):\n",
     "    batch_size = 2000\n",
     "    lossi = []\n",
+    "    # 记录各层的参数更新幅度\n",
     "    udi = {}\n",
     "    x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n",
     "    _prob = torch.ones(x.shape[0]) / x.shape[0]\n",
@@ -113,7 +114,7 @@
     }
    ],
    "source": [
-    "# 初始化时模型损失和参数更新幅度\n",
+    "# 初始化时模型损失和各层的参数更新幅度\n",
     "train_model(1)"
    ]
   },
@@ -148,6 +149,7 @@
     "        t = layer.out\n",
     "        # 激活函数的输出大于0.99或者小于0.01时,激活函数“过热”\n",
     "        saturation = ((t - 0.5).abs() > 0.49).float().mean()\n",
+    "        # 激活函数输出的分布情况\n",
     "        hy, hx = torch.histogram(t, density=True)\n",
     "        plt.plot(hx[:-1].detach(), hy.detach())\n",
     "        layer_name = f'layer {i} ({layer.__class__.__name__})'\n",
@@ -155,7 +157,7 @@
     "        legends.append(f'{layer_name}: {stats}')\n",
     "plt.legend(legends, shadow=True)\n",
     "plt.title('激活函数输出分布情况', fontsize=18)\n",
-    "plt.savefig(\"activation_distribution.png\", dpi=200)\n",
+    "plt.savefig('activation_distribution.png', dpi=200)\n",
     "plt.show()"
    ]
   },
@@ -195,7 +197,7 @@
     "        legends.append(f'{layer_name}: {stats}')\n",
     "plt.legend(legends, shadow=True)\n",
     "plt.title('线性输出的梯度分布情况', fontsize=18)\n",
-    "plt.savefig(\"linear_grad_distribution.png\", dpi=200)\n",
+    "plt.savefig('linear_grad_distribution.png', dpi=200)\n",
     "plt.show()"
    ]
   },
@@ -238,7 +240,7 @@
     "        # 只观察权重参数,也就是w\n",
     "        p = layer.parameters()[0]\n",
     "        g = p.grad\n",
-    "        # 统计梯度标准差与参数标准差的比例\n",
+    "        # 梯度标准差与参数标准差的比例\n",
     "        grad_ratio = g.std() / p.std()\n",
     "        hy, hx = torch.histogram(g, density=True)\n",
     "        ax.plot(hx[:-1].detach(), hy.detach())\n",
@@ -248,7 +250,7 @@
     "        print(f'{layer_name}: {stats}')\n",
     "ax.legend(legends, shadow=True)\n",
     "ax.set_title('权重参数的梯度分布情况', fontsize=18)\n",
-    "fig.savefig(\"weight_grad_distribution.png\", dpi=200)"
+    "fig.savefig('weight_grad_distribution.png', dpi=200)"
    ]
   },
   {
@@ -270,7 +272,7 @@
    ],
    "source": [
     "ax.set_xlim([-0.05, 0.05])\n",
-    "fig.savefig(\"weight_grad_distribution_zoom.png\", dpi=200)\n",
+    "fig.savefig('weight_grad_distribution_zoom.png', dpi=200)\n",
     "fig"
    ]
   },
@@ -312,7 +314,7 @@
     "# 理想的标准线\n",
     "plt.plot([0, len(ud[0])], [-3, -3], 'k--')\n",
     "plt.legend(legends, shadow=True)\n",
-    "plt.savefig(\"weights_grad_ratio.png\", dpi=200)\n",
+    "plt.savefig('weights_grad_ratio.png', dpi=200)\n",
     "plt.show()"
    ]
   }

+ 7 - 5
ch08_mlp/initialization.ipynb

@@ -26,7 +26,7 @@
     "# logits变化幅度对交叉熵的影响\n",
     "clz_num = 2\n",
     "num = 1000\n",
-    "## 标准正态分布\n",
+    "## 当logits是标准正态分布时,交叉熵较小\n",
     "logits = torch.randn(num, clz_num)\n",
     "y = torch.randint(clz_num, (num,))\n",
     "F.cross_entropy(logits, y)"
@@ -49,7 +49,7 @@
     }
    ],
    "source": [
-    "## 将logits的幅度变大10倍\n",
+    "## 将logits的幅度变大10倍,交叉熵较大\n",
     "logits = torch.randn(num, clz_num) * 10\n",
     "y = torch.randint(clz_num, (num,))\n",
     "F.cross_entropy(logits, y)"
@@ -61,7 +61,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 参数初始化的优化\n",
     "from utils import Linear, Sigmoid, Sequential\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline\n",
@@ -109,7 +108,7 @@
     "from sklearn.preprocessing import StandardScaler\n",
     "import numpy as np\n",
     "\n",
-    "\n",
+    "# 生成训练数据并将数据做归一化处理\n",
     "np.random.seed(12046)\n",
     "data = make_moons(n_samples=2000, noise=.05)\n",
     "scaler = StandardScaler()\n",
@@ -125,6 +124,7 @@
     "def train_model(max_steps):\n",
     "    batch_size = 2000\n",
     "    lossi = []\n",
+    "    # 记录各层的参数更新幅度\n",
     "    udi = {}\n",
     "    x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n",
     "    _prob = torch.ones(x.shape[0]) / x.shape[0]\n",
@@ -217,6 +217,7 @@
     "        t = layer.out\n",
     "        # 激活函数的输出大于0.99或者小于0.01时,激活函数“过热”\n",
     "        saturation = ((t - 0.5).abs() > 0.49).float().mean()\n",
+    "        # 激活函数输出的分布情况\n",
     "        hy, hx = torch.histogram(t, density=True)\n",
     "        plt.plot(hx[:-1].detach(), hy.detach())\n",
     "        layer_name = f'layer {i} ({layer.__class__.__name__})'\n",
@@ -372,6 +373,7 @@
     "def layer_stats(func, calculate_gain):\n",
     "    \"\"\"\n",
     "    只做向前传播,并记录每一层输出的分布情况\n",
+    "    理想情况下,各层输出的方差应该保持稳定\n",
     "    参数\n",
     "    ----\n",
     "    func : 激活函数\n",
@@ -386,7 +388,7 @@
     "        in_features, _ = l.weight.shape\n",
     "        # 做初步的优化\n",
     "        l.weight *= 1 / in_features ** 0.5\n",
-    "        # 进一步优化\n",
+    "        # 利用函数增益做进一步优化\n",
     "        l.weight *= calculate_gain\n",
     "        x = func(l(x))\n",
     "        # 记录输出的分布情况\n",

File diff suppressed because it is too large
+ 61 - 89
ch08_mlp/logit_regression.ipynb


File diff suppressed because it is too large
+ 12 - 12
ch08_mlp/mlp.ipynb


File diff suppressed because it is too large
+ 2 - 1
ch08_mlp/normalization.ipynb


+ 16 - 16
ch08_mlp/utils.py

@@ -1,7 +1,7 @@
 # -*- coding: UTF-8 -*-
-"""
-此脚本用于定义多层感知器的各个组件,比如线性模型,Sigmoid函数等
-"""
+'''
+定义多层感知器的模型组件,比如线性模型,Sigmoid函数
+'''
 
 
 import torch
@@ -12,10 +12,10 @@ import numpy as np
 class Linear:
     
     def __init__(self, in_features, out_features, bias=True):
-        """
+        '''
         模型参数初始化
-        需要注意的是,此次做参数初始化的优化
-        """
+        需要注意的是,此次故意没做参数初始化的优化
+        '''
         self.weight = torch.randn((in_features, out_features))
         self.bias = torch.randn(out_features) if bias else None
         
@@ -26,11 +26,11 @@ class Linear:
         return self.out
     
     def parameters(self):
-        """
+        '''
         返回线性模型的参数,主要用于参数迭代更新
         由于PyTorch的计算单元就是张量,
         所以此次只需将不同参数简单合并成列表即可
-        """
+        '''
         if self.bias is not None:
             return [self.weight, self.bias]
         return [self.weight]
@@ -43,9 +43,9 @@ class Sigmoid:
         return self.out
     
     def parameters(self):
-        """
+        '''
         Sigmoid函数没有模型参数
-        """
+        '''
         return []
 
 
@@ -56,9 +56,9 @@ class Tanh:
         return self.out
     
     def parameters(self):
-        """
+        '''
         Tanh函数没有模型参数
-        """
+        '''
         return []
 
 
@@ -74,15 +74,15 @@ class Sequential:
         return self.out
     
     def parameters(self):
-        """
+        '''
         将各层的模型参数简单合并成列表即可
-        """
+        '''
         return [p for layer in self.layers for p in layer.parameters()]
     
     def predict_proba(self, x):
-        """
+        '''
         为了数据可视化,计算模型输出的概率
-        """
+        '''
         if isinstance(x, np.ndarray):
             x = torch.tensor(x).float()
         logits = self(x)

Some files were not shown because too many files changed in this diff