{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# 安装第三方库\n", "!pip install statsmodels" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import statsmodels.api as sm\n", "from statsmodels.sandbox.regression.predstd import wls_prediction_std\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xy
0107.70
1109.87
21111.18
31210.43
41312.36
51414.15
61515.73
71616.40
81718.86
91816.13
101918.21
112018.37
122122.61
132219.83
142322.67
152422.70
162525.16
172625.55
182728.21
192828.12
\n", "
" ], "text/plain": [ " x y\n", "0 10 7.70\n", "1 10 9.87\n", "2 11 11.18\n", "3 12 10.43\n", "4 13 12.36\n", "5 14 14.15\n", "6 15 15.73\n", "7 16 16.40\n", "8 17 18.86\n", "9 18 16.13\n", "10 19 18.21\n", "11 20 18.37\n", "12 21 22.61\n", "13 22 19.83\n", "14 23 22.67\n", "15 24 22.70\n", "16 25 25.16\n", "17 26 25.55\n", "18 27 28.21\n", "19 28 28.12" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 读取并展示数据\n", "data = pd.read_csv('./data/simple_example.csv')\n", "data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "( const x\n", " 0 1.0 10\n", " 1 1.0 10\n", " 2 1.0 11\n", " 3 1.0 12\n", " 4 1.0 13\n", " 5 1.0 14\n", " 6 1.0 15\n", " 7 1.0 16\n", " 8 1.0 17\n", " 9 1.0 18\n", " 10 1.0 19\n", " 11 1.0 20\n", " 12 1.0 21\n", " 13 1.0 22\n", " 14 1.0 23\n", " 15 1.0 24\n", " 16 1.0 25\n", " 17 1.0 26\n", " 18 1.0 27\n", " 19 1.0 28,\n", " y\n", " 0 7.70\n", " 1 9.87\n", " 2 11.18\n", " 3 10.43\n", " 4 12.36\n", " 5 14.15\n", " 6 15.73\n", " 7 16.40\n", " 8 18.86\n", " 9 16.13\n", " 10 18.21\n", " 11 18.37\n", " 12 22.61\n", " 13 19.83\n", " 14 22.67\n", " 15 22.70\n", " 16 25.16\n", " 17 25.55\n", " 18 28.21\n", " 19 28.12)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 数据准备\n", "features, labels = ['x'], ['y']\n", "Y = data[labels]\n", "# 加入常量变量\n", "X = sm.add_constant(data[features])\n", "X, Y" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 构建并训练模型\n", "model = sm.OLS(Y, X)\n", "re = model.fit()\n", "re" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: y R-squared: 0.962\n", "Model: OLS Adj. R-squared: 0.960\n", "Method: Least Squares F-statistic: 460.5\n", "Date: Thu, 02 Nov 2023 Prob (F-statistic): 2.85e-14\n", "Time: 19:22:24 Log-Likelihood: -31.374\n", "No. Observations: 20 AIC: 66.75\n", "Df Residuals: 18 BIC: 68.74\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "const -0.9495 0.934 -1.017 0.323 -2.912 1.013\n", "x 1.0330 0.048 21.458 0.000 0.932 1.134\n", "==============================================================================\n", "Omnibus: 0.745 Durbin-Watson: 2.345\n", "Prob(Omnibus): 0.689 Jarque-Bera (JB): 0.673\n", "Skew: 0.074 Prob(JB): 0.714\n", "Kurtosis: 2.113 Cond. No. 66.3\n", "==============================================================================\n", "\n", "Notes:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "# 整体统计分析结果\n", "print(re.summary())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "检验假设x的系数等于0:\n", "\n" ] } ], "source": [ "# 用f_test检测x对应的系数a是否显著\n", "print('检验假设x的系数等于0:')\n", "print(re.f_test('x=0'))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "检测假设const的系数等于0:\n", "\n" ] } ], "source": [ "# 用f_test检测常量b是否显著\n", "print('检测假设const的系数等于0:')\n", "print(re.f_test('const=0'))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "检测假设x的系数等于1和const的系数等于0同时成立:\n", "\n" ] } ], "source": [ "# 用f_test检测a=1, b=0同时成立的显著性\n", "print('检测假设x的系数等于1和const的系数等于0同时成立:')\n", "print(re.f_test(['x=1', 'const=0']))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " OLS Regression Results \n", "=======================================================================================\n", "Dep. Variable: y R-squared (uncentered): 0.996\n", "Model: OLS Adj. R-squared (uncentered): 0.996\n", "Method: Least Squares F-statistic: 4876.\n", "Date: Thu, 02 Nov 2023 Prob (F-statistic): 2.26e-24\n", "Time: 19:22:24 Log-Likelihood: -31.933\n", "No. Observations: 20 AIC: 65.87\n", "Df Residuals: 19 BIC: 66.86\n", "Df Model: 1 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [0.025 0.975]\n", "------------------------------------------------------------------------------\n", "x 0.9862 0.014 69.825 0.000 0.957 1.016\n", "==============================================================================\n", "Omnibus: 0.489 Durbin-Watson: 2.218\n", "Prob(Omnibus): 0.783 Jarque-Bera (JB): 0.561\n", "Skew: 0.033 Prob(JB): 0.755\n", "Kurtosis: 2.182 Cond. No. 1.00\n", "==============================================================================\n", "\n", "Notes:\n", "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n", "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n" ] } ], "source": [ "# const并不显著,去掉这个常量变量\n", "# 构建新的模型\n", "model_new = sm.OLS(Y, data[features])\n", "re_new = model_new.fit()\n", "# 输出新模型的分析结果\n", "print(re_new.summary())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 计算预测结果的标准差,预测下界,预测上界\n", "pre_std, pre_low, pre_up = wls_prediction_std(re_new, alpha=0.05)\n", "# 为在Matplotlib中显示中文,设置特殊字体\n", "plt.rcParams['font.sans-serif'] = ['SimHei']\n", "plt.rcParams['axes.unicode_minus'] = False\n", "plt.rcParams.update({'font.size': 13})\n", "# 创建一个图形框\n", "fig = plt.figure(figsize=(6, 6), dpi=100)\n", "# 在图形框里只画一幅图\n", "ax = fig.add_subplot(111)\n", "ax.set_title('线性回归统计分析示例')\n", "ax.set_xlabel('$x$')\n", "ax.set_ylabel('$y$')\n", "# 画点图,用蓝色圆点表示原始数据\n", "ax.scatter(data[features], data[labels], color='b', label='真实值: $y = x + \\epsilon$')\n", "# 画线图,用红色虚线表示95%置信区间\n", "ax.plot(data[features], pre_up, 'r--', label='95%置信区间')\n", "ax.plot(data[features], re_new.predict(data[features]), color='r',\n", " label=f'预测值: $y = {re_new.params[features].item():.3f}x$')\n", "ax.plot(data[features], pre_low, 'r--')\n", "# 设置图例的样式\n", "legend = plt.legend(shadow=True)\n", "legend.get_frame().set_facecolor('#6F93AE')\n", "plt.savefig('linear_stat.png', dpi=200)\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }