|
|
@@ -39,7 +39,7 @@
|
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
|
"import numpy as np\n",
|
|
|
"\n",
|
|
|
- "\n",
|
|
|
+ "# 生成训练数据并对数据做归一化处理\n",
|
|
|
"np.random.seed(12046)\n",
|
|
|
"data = make_moons(n_samples=2000, noise=.05)\n",
|
|
|
"scaler = StandardScaler()\n",
|
|
|
@@ -55,6 +55,7 @@
|
|
|
"def train_model(max_steps):\n",
|
|
|
" batch_size = 2000\n",
|
|
|
" lossi = []\n",
|
|
|
+ " # 记录各层的参数更新幅度\n",
|
|
|
" udi = {}\n",
|
|
|
" x, y = torch.tensor(data[0]).float(), torch.tensor(data[1])\n",
|
|
|
" _prob = torch.ones(x.shape[0]) / x.shape[0]\n",
|
|
|
@@ -113,7 +114,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 初始化时模型损失和参数更新幅度\n",
|
|
|
+ "# 初始化时模型损失和各层的参数更新幅度\n",
|
|
|
"train_model(1)"
|
|
|
]
|
|
|
},
|
|
|
@@ -148,6 +149,7 @@
|
|
|
" t = layer.out\n",
|
|
|
" # 激活函数的输出大于0.99或者小于0.01时,激活函数“过热”\n",
|
|
|
" saturation = ((t - 0.5).abs() > 0.49).float().mean()\n",
|
|
|
+ " # 激活函数输出的分布情况\n",
|
|
|
" hy, hx = torch.histogram(t, density=True)\n",
|
|
|
" plt.plot(hx[:-1].detach(), hy.detach())\n",
|
|
|
" layer_name = f'layer {i} ({layer.__class__.__name__})'\n",
|
|
|
@@ -155,7 +157,7 @@
|
|
|
" legends.append(f'{layer_name}: {stats}')\n",
|
|
|
"plt.legend(legends, shadow=True)\n",
|
|
|
"plt.title('激活函数输出分布情况', fontsize=18)\n",
|
|
|
- "plt.savefig(\"activation_distribution.png\", dpi=200)\n",
|
|
|
+ "plt.savefig('activation_distribution.png', dpi=200)\n",
|
|
|
"plt.show()"
|
|
|
]
|
|
|
},
|
|
|
@@ -195,7 +197,7 @@
|
|
|
" legends.append(f'{layer_name}: {stats}')\n",
|
|
|
"plt.legend(legends, shadow=True)\n",
|
|
|
"plt.title('线性输出的梯度分布情况', fontsize=18)\n",
|
|
|
- "plt.savefig(\"linear_grad_distribution.png\", dpi=200)\n",
|
|
|
+ "plt.savefig('linear_grad_distribution.png', dpi=200)\n",
|
|
|
"plt.show()"
|
|
|
]
|
|
|
},
|
|
|
@@ -238,7 +240,7 @@
|
|
|
" # 只观察权重参数,也就是w\n",
|
|
|
" p = layer.parameters()[0]\n",
|
|
|
" g = p.grad\n",
|
|
|
- " # 统计梯度标准差与参数标准差的比例\n",
|
|
|
+ " # 梯度标准差与参数标准差的比例\n",
|
|
|
" grad_ratio = g.std() / p.std()\n",
|
|
|
" hy, hx = torch.histogram(g, density=True)\n",
|
|
|
" ax.plot(hx[:-1].detach(), hy.detach())\n",
|
|
|
@@ -248,7 +250,7 @@
|
|
|
" print(f'{layer_name}: {stats}')\n",
|
|
|
"ax.legend(legends, shadow=True)\n",
|
|
|
"ax.set_title('权重参数的梯度分布情况', fontsize=18)\n",
|
|
|
- "fig.savefig(\"weight_grad_distribution.png\", dpi=200)"
|
|
|
+ "fig.savefig('weight_grad_distribution.png', dpi=200)"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
|
@@ -270,7 +272,7 @@
|
|
|
],
|
|
|
"source": [
|
|
|
"ax.set_xlim([-0.05, 0.05])\n",
|
|
|
- "fig.savefig(\"weight_grad_distribution_zoom.png\", dpi=200)\n",
|
|
|
+ "fig.savefig('weight_grad_distribution_zoom.png', dpi=200)\n",
|
|
|
"fig"
|
|
|
]
|
|
|
},
|
|
|
@@ -312,7 +314,7 @@
|
|
|
"# 理想的标准线\n",
|
|
|
"plt.plot([0, len(ud[0])], [-3, -3], 'k--')\n",
|
|
|
"plt.legend(legends, shadow=True)\n",
|
|
|
- "plt.savefig(\"weights_grad_ratio.png\", dpi=200)\n",
|
|
|
+ "plt.savefig('weights_grad_ratio.png', dpi=200)\n",
|
|
|
"plt.show()"
|
|
|
]
|
|
|
}
|