2 years ago · 83d73ee042
--- a/ch08_mlp/README.md
+++ b/ch08_mlp/README.md
@@ -0,0 +1,12 @@
 
				+
			
 
				+|代码|说明|
			
 
				+|---|---|
			
 
				+|[utils.py](utils.py)| 定义多层感知器的模型组件，比如线性模型，Sigmoid函数等 |
			
 
				+|[perceptron.ipynb](perceptron.ipynb)| 展示感知器模型对应的计算图 |
			
 
				+|[logit_regression.ipynb](logit_regression.ipynb)| 按照神经网络的方式重新搭建逻辑回归模型，并训练模型 |
			
 
				+|[mlp.ipynb](mlp.ipynb)| 搭建多层感知器模型，并展示该模型的通用性 |
			
 
				+|[saturated\_activation_function.ipynb](saturated_activation_function.ipynb)| 通过计算图，展示坏死的神经细胞 |
			
 
				+|[activation_monitoring.ipynb](activation_monitoring.ipynb)| 监控模型的训练情况 |
			
 
				+|[activation_functions.ipynb](activation_functions.ipynb)| 常用的激活函数 |
			
 
				+|[initialization.ipynb](initialization.ipynb)| 参数初始化的优化方案 |
			
 
				+|[normalization.ipynb](normalization.ipynb)| 归一化层 |
			
--- a/ch08_mlp/activation_functions.ipynb
+++ b/ch08_mlp/activation_functions.ipynb
@@ -45,7 +45,7 @@
 
				     "x = torch.linspace(-10, 10, 1000)\n",
			
 
				     "x.requires_grad = True\n",
			
 
				     "y = torch.sigmoid(x)\n",
			
 
				-    "draw_graph(x, y).savefig(\"sigmoid.png\", dpi=200)"
			
 
				+    "draw_graph(x, y).savefig('sigmoid.png', dpi=200)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -71,7 +71,7 @@
 
				     "x = torch.linspace(-10, 10, 1000)\n",
			
 
				     "x.requires_grad = True\n",
			
 
				     "y = torch.tanh(x)\n",
			
 
				-    "draw_graph(x, y).savefig(\"tanh.png\", dpi=200)"
			
 
				+    "draw_graph(x, y).savefig('tanh.png', dpi=200)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -97,7 +97,7 @@
 
				     "x = torch.linspace(-10, 10, 1000)\n",
			
 
				     "x.requires_grad = True\n",
			
 
				     "y = torch.nn.functional.relu(x)\n",
			
 
				-    "draw_graph(x, y).savefig(\"relu.png\", dpi=200)"
			
 
				+    "draw_graph(x, y).savefig('relu.png', dpi=200)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -158,7 +158,7 @@
 
				     "    legends.append(key)\n",
			
 
				     "func_ax.legend(legends, shadow=True)\n",
			
 
				     "gradient_ax.legend(legends, shadow=True)\n",
			
 
				-    "fig.savefig(\"relu_family.png\", dpi=200)"
			
 
				+    "fig.savefig('relu_family.png', dpi=200)"
			
 
				    ]
			
 
				   }
			
 
				  ],
			
--- a/ch08_mlp/activation_monitoring.ipynb
+++ b/ch08_mlp/activation_monitoring.ipynb
@@ -39,7 +39,7 @@
 
				     "from sklearn.preprocessing import StandardScaler\n",
			
 
				     "import numpy as np\n",
			
 
				     "\n",
			
 
				-    "\n",
			
 
				+    "# 生成训练数据并对数据做归一化处理\n",
			
 
				     "np.random.seed(12046)\n",
			
 
				     "data = make_moons(n_samples=2000, noise=.05)\n",
			
 
				     "scaler = StandardScaler()\n",
			
@@ -55,6 +55,7 @@
 
				     "def train_model(max_steps):\n",
			
 
				     "    batch_size = 2000\n",
			
 
				     "    lossi = []\n",
			
 
				+    "    # 记录各层的参数更新幅度\n",
			
 
				     "    udi = {}\n",
			
 
				     "    x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n",
			
 
				     "    _prob = torch.ones(x.shape[0]) / x.shape[0]\n",
			
@@ -113,7 +114,7 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "# 初始化时模型损失和参数更新幅度\n",
			
 
				+    "# 初始化时模型损失和各层的参数更新幅度\n",
			
 
				     "train_model(1)"
			
 
				    ]
			
 
				   },
			
@@ -148,6 +149,7 @@
 
				     "        t = layer.out\n",
			
 
				     "        # 激活函数的输出大于0.99或者小于0.01时，激活函数“过热”\n",
			
 
				     "        saturation = ((t - 0.5).abs() > 0.49).float().mean()\n",
			
 
				+    "        # 激活函数输出的分布情况\n",
			
 
				     "        hy, hx = torch.histogram(t, density=True)\n",
			
 
				     "        plt.plot(hx[:-1].detach(), hy.detach())\n",
			
 
				     "        layer_name = f'layer {i} ({layer.__class__.__name__})'\n",
			
@@ -155,7 +157,7 @@
 
				     "        legends.append(f'{layer_name}: {stats}')\n",
			
 
				     "plt.legend(legends, shadow=True)\n",
			
 
				     "plt.title('激活函数输出分布情况', fontsize=18)\n",
			
 
				-    "plt.savefig(\"activation_distribution.png\", dpi=200)\n",
			
 
				+    "plt.savefig('activation_distribution.png', dpi=200)\n",
			
 
				     "plt.show()"
			
 
				    ]
			
 
				   },
			
@@ -195,7 +197,7 @@
 
				     "        legends.append(f'{layer_name}: {stats}')\n",
			
 
				     "plt.legend(legends, shadow=True)\n",
			
 
				     "plt.title('线性输出的梯度分布情况', fontsize=18)\n",
			
 
				-    "plt.savefig(\"linear_grad_distribution.png\", dpi=200)\n",
			
 
				+    "plt.savefig('linear_grad_distribution.png', dpi=200)\n",
			
 
				     "plt.show()"
			
 
				    ]
			
 
				   },
			
@@ -238,7 +240,7 @@
 
				     "        # 只观察权重参数，也就是w\n",
			
 
				     "        p = layer.parameters()[0]\n",
			
 
				     "        g = p.grad\n",
			
 
				-    "        # 统计梯度标准差与参数标准差的比例\n",
			
 
				+    "        # 梯度标准差与参数标准差的比例\n",
			
 
				     "        grad_ratio = g.std() / p.std()\n",
			
 
				     "        hy, hx = torch.histogram(g, density=True)\n",
			
 
				     "        ax.plot(hx[:-1].detach(), hy.detach())\n",
			
@@ -248,7 +250,7 @@
 
				     "        print(f'{layer_name}: {stats}')\n",
			
 
				     "ax.legend(legends, shadow=True)\n",
			
 
				     "ax.set_title('权重参数的梯度分布情况', fontsize=18)\n",
			
 
				-    "fig.savefig(\"weight_grad_distribution.png\", dpi=200)"
			
 
				+    "fig.savefig('weight_grad_distribution.png', dpi=200)"
			
 
				    ]
			
 
				   },
			
 
				   {
			
@@ -270,7 +272,7 @@
 
				    ],
			
 
				    "source": [
			
 
				     "ax.set_xlim([-0.05, 0.05])\n",
			
 
				-    "fig.savefig(\"weight_grad_distribution_zoom.png\", dpi=200)\n",
			
 
				+    "fig.savefig('weight_grad_distribution_zoom.png', dpi=200)\n",
			
 
				     "fig"
			
 
				    ]
			
 
				   },
			
@@ -312,7 +314,7 @@
 
				     "# 理想的标准线\n",
			
 
				     "plt.plot([0, len(ud[0])], [-3, -3], 'k--')\n",
			
 
				     "plt.legend(legends, shadow=True)\n",
			
 
				-    "plt.savefig(\"weights_grad_ratio.png\", dpi=200)\n",
			
 
				+    "plt.savefig('weights_grad_ratio.png', dpi=200)\n",
			
 
				     "plt.show()"
			
 
				    ]
			
 
				   }
			
--- a/ch08_mlp/initialization.ipynb
+++ b/ch08_mlp/initialization.ipynb
@@ -26,7 +26,7 @@
 
				     "# logits变化幅度对交叉熵的影响\n",
			
 
				     "clz_num = 2\n",
			
 
				     "num = 1000\n",
			
 
				-    "## 标准正态分布\n",
			
 
				+    "## 当logits是标准正态分布时，交叉熵较小\n",
			
 
				     "logits = torch.randn(num, clz_num)\n",
			
 
				     "y = torch.randint(clz_num, (num,))\n",
			
 
				     "F.cross_entropy(logits, y)"
			
@@ -49,7 +49,7 @@
 
				     }
			
 
				    ],
			
 
				    "source": [
			
 
				-    "## 将logits的幅度变大10倍\n",
			
 
				+    "## 将logits的幅度变大10倍，交叉熵较大\n",
			
 
				     "logits = torch.randn(num, clz_num) * 10\n",
			
 
				     "y = torch.randint(clz_num, (num,))\n",
			
 
				     "F.cross_entropy(logits, y)"
			
@@ -61,7 +61,6 @@
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				-    "# 参数初始化的优化\n",
			
 
				     "from utils import Linear, Sigmoid, Sequential\n",
			
 
				     "import matplotlib.pyplot as plt\n",
			
 
				     "%matplotlib inline\n",
			
@@ -109,7 +108,7 @@
 
				     "from sklearn.preprocessing import StandardScaler\n",
			
 
				     "import numpy as np\n",
			
 
				     "\n",
			
 
				-    "\n",
			
 
				+    "# 生成训练数据并将数据做归一化处理\n",
			
 
				     "np.random.seed(12046)\n",
			
 
				     "data = make_moons(n_samples=2000, noise=.05)\n",
			
 
				     "scaler = StandardScaler()\n",
			
@@ -125,6 +124,7 @@
 
				     "def train_model(max_steps):\n",
			
 
				     "    batch_size = 2000\n",
			
 
				     "    lossi = []\n",
			
 
				+    "    # 记录各层的参数更新幅度\n",
			
 
				     "    udi = {}\n",
			
 
				     "    x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n",
			
 
				     "    _prob = torch.ones(x.shape[0]) / x.shape[0]\n",
			
@@ -217,6 +217,7 @@
 
				     "        t = layer.out\n",
			
 
				     "        # 激活函数的输出大于0.99或者小于0.01时，激活函数“过热”\n",
			
 
				     "        saturation = ((t - 0.5).abs() > 0.49).float().mean()\n",
			
 
				+    "        # 激活函数输出的分布情况\n",
			
 
				     "        hy, hx = torch.histogram(t, density=True)\n",
			
 
				     "        plt.plot(hx[:-1].detach(), hy.detach())\n",
			
 
				     "        layer_name = f'layer {i} ({layer.__class__.__name__})'\n",
			
@@ -372,6 +373,7 @@
 
				     "def layer_stats(func, calculate_gain):\n",
			
 
				     "    \"\"\"\n",
			
 
				     "    只做向前传播，并记录每一层输出的分布情况\n",
			
 
				+    "    理想情况下，各层输出的方差应该保持稳定\n",
			
 
				     "    参数\n",
			
 
				     "    ----\n",
			
 
				     "    func : 激活函数\n",
			
@@ -386,7 +388,7 @@
 
				     "        in_features, _ = l.weight.shape\n",
			
 
				     "        # 做初步的优化\n",
			
 
				     "        l.weight *= 1 / in_features ** 0.5\n",
			
 
				-    "        # 进一步优化\n",
			
 
				+    "        # 利用函数增益做进一步优化\n",
			
 
				     "        l.weight *= calculate_gain\n",
			
 
				     "        x = func(l(x))\n",
			
 
				     "        # 记录输出的分布情况\n",
			
--- a/ch08_mlp/logit_regression.ipynb
+++ b/ch08_mlp/logit_regression.ipynb
--- a/ch08_mlp/mlp.ipynb
+++ b/ch08_mlp/mlp.ipynb
--- a/ch08_mlp/normalization.ipynb
+++ b/ch08_mlp/normalization.ipynb
--- a/ch08_mlp/utils.py
+++ b/ch08_mlp/utils.py
@@ -1,7 +1,7 @@
 
				 # -*- coding: UTF-8 -*-
			
 
				-"""
			
 
				-此脚本用于定义多层感知器的各个组件，比如线性模型，Sigmoid函数等等
			
 
				-"""
			
 
				+'''
			
 
				+定义多层感知器的模型组件，比如线性模型，Sigmoid函数等
			
 
				+'''
			
 
				 
			
 
				 
			
 
				 import torch
			
@@ -12,10 +12,10 @@ import numpy as np
 
				 class Linear:
			
 
				     
			
 
				     def __init__(self, in_features, out_features, bias=True):
			
 
				-        """
			
 
				+        '''
			
 
				         模型参数初始化
			
 
				-        需要注意的是，此次未做参数初始化的优化
			
 
				-        """
			
 
				+        需要注意的是，此次故意没做参数初始化的优化
			
 
				+        '''
			
 
				         self.weight = torch.randn((in_features, out_features))
			
 
				         self.bias = torch.randn(out_features) if bias else None
			
 
				         
			
@@ -26,11 +26,11 @@ class Linear:
 
				         return self.out
			
 
				     
			
 
				     def parameters(self):
			
 
				-        """
			
 
				+        '''
			
 
				         返回线性模型的参数，主要用于参数迭代更新
			
 
				         由于PyTorch的计算单元就是张量，
			
 
				         所以此次只需将不同参数简单合并成列表即可
			
 
				-        """
			
 
				+        '''
			
 
				         if self.bias is not None:
			
 
				             return [self.weight, self.bias]
			
 
				         return [self.weight]
			
@@ -43,9 +43,9 @@ class Sigmoid:
 
				         return self.out
			
 
				     
			
 
				     def parameters(self):
			
 
				-        """
			
 
				+        '''
			
 
				         Sigmoid函数没有模型参数
			
 
				-        """
			
 
				+        '''
			
 
				         return []
			
 
				 
			
 
				 
			
@@ -56,9 +56,9 @@ class Tanh:
 
				         return self.out
			
 
				     
			
 
				     def parameters(self):
			
 
				-        """
			
 
				+        '''
			
 
				         Tanh函数没有模型参数
			
 
				-        """
			
 
				+        '''
			
 
				         return []
			
 
				 
			
 
				 
			
@@ -74,15 +74,15 @@ class Sequential:
 
				         return self.out
			
 
				     
			
 
				     def parameters(self):
			
 
				-        """
			
 
				+        '''
			
 
				         将各层的模型参数简单合并成列表即可
			
 
				-        """
			
 
				+        '''
			
 
				         return [p for layer in self.layers for p in layer.parameters()]
			
 
				     
			
 
				     def predict_proba(self, x):
			
 
				-        """
			
 
				+        '''
			
 
				         为了数据可视化，计算模型输出的概率
			
 
				-        """
			
 
				+        '''
			
 
				         if isinstance(x, np.ndarray):
			
 
				             x = torch.tensor(x).float()
			
 
				         logits = self(x)