|
|
@@ -0,0 +1,210 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import statsmodels.api as sm\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "np.random.seed(4873)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "data = pd.read_csv('./data/simple_example.csv')\n",
|
|
|
+ "Y = data[['y']]\n",
|
|
|
+ "X = data[['x']]\n",
|
|
|
+ "# 加入新的随机变量,此变量的系数应为0\n",
|
|
|
+ "X['z'] = np.random.randint(2, size=20)\n",
|
|
|
+ "# 加入常量变量\n",
|
|
|
+ "X = sm.add_constant(X)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "model = sm.OLS(Y, X)\n",
|
|
|
+ "re = model.fit()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " OLS Regression Results \n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "Dep. Variable: y R-squared: 0.963\n",
|
|
|
+ "Model: OLS Adj. R-squared: 0.959\n",
|
|
|
+ "Method: Least Squares F-statistic: 222.8\n",
|
|
|
+ "Date: Thu, 02 Nov 2023 Prob (F-statistic): 6.38e-13\n",
|
|
|
+ "Time: 15:52:54 Log-Likelihood: -31.141\n",
|
|
|
+ "No. Observations: 20 AIC: 68.28\n",
|
|
|
+ "Df Residuals: 17 BIC: 71.27\n",
|
|
|
+ "Df Model: 2 \n",
|
|
|
+ "Covariance Type: nonrobust \n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ " coef std err t P>|t| [0.025 0.975]\n",
|
|
|
+ "------------------------------------------------------------------------------\n",
|
|
|
+ "const -0.8983 0.953 -0.942 0.359 -2.910 1.113\n",
|
|
|
+ "x 1.0400 0.050 20.722 0.000 0.934 1.146\n",
|
|
|
+ "z -0.3619 0.571 -0.634 0.535 -1.566 0.843\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "Omnibus: 1.295 Durbin-Watson: 2.209\n",
|
|
|
+ "Prob(Omnibus): 0.523 Jarque-Bera (JB): 0.852\n",
|
|
|
+ "Skew: -0.061 Prob(JB): 0.653\n",
|
|
|
+ "Kurtosis: 1.996 Cond. No. 66.7\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "\n",
|
|
|
+ "Notes:\n",
|
|
|
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 整体统计分析结果\n",
|
|
|
+ "print(re.summary())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "np.random.seed(5320)\n",
|
|
|
+ "x = np.array(range(0, 20)) / 2\n",
|
|
|
+ "error = np.round(np.random.randn(20), 2)\n",
|
|
|
+ "y = 0.05 * x + error\n",
|
|
|
+ "# 新加入的无关变量z恒等于1\n",
|
|
|
+ "z = np.zeros(20) + 1\n",
|
|
|
+ "data = pd.DataFrame({\"x\": x, \"z\": z, \"y\": y})"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 6,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " OLS Regression Results \n",
|
|
|
+ "=======================================================================================\n",
|
|
|
+ "Dep. Variable: y R-squared (uncentered): 0.204\n",
|
|
|
+ "Model: OLS Adj. R-squared (uncentered): 0.162\n",
|
|
|
+ "Method: Least Squares F-statistic: 4.878\n",
|
|
|
+ "Date: Thu, 02 Nov 2023 Prob (F-statistic): 0.0397\n",
|
|
|
+ "Time: 15:52:54 Log-Likelihood: -29.583\n",
|
|
|
+ "No. Observations: 20 AIC: 61.17\n",
|
|
|
+ "Df Residuals: 19 BIC: 62.16\n",
|
|
|
+ "Df Model: 1 \n",
|
|
|
+ "Covariance Type: nonrobust \n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ " coef std err t P>|t| [0.025 0.975]\n",
|
|
|
+ "------------------------------------------------------------------------------\n",
|
|
|
+ "x 0.0969 0.044 2.209 0.040 0.005 0.189\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "Omnibus: 0.871 Durbin-Watson: 2.037\n",
|
|
|
+ "Prob(Omnibus): 0.647 Jarque-Bera (JB): 0.815\n",
|
|
|
+ "Skew: 0.275 Prob(JB): 0.665\n",
|
|
|
+ "Kurtosis: 2.179 Cond. No. 1.00\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "\n",
|
|
|
+ "Notes:\n",
|
|
|
+ "[1] R² is computed without centering (uncentered) since the model does not contain a constant.\n",
|
|
|
+ "[2] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "# 没有多余变量时,x系数符号估计正确,为正\n",
|
|
|
+ "model = sm.OLS(data[['y']], data[['x']])\n",
|
|
|
+ "re = model.fit()\n",
|
|
|
+ "print(re.summary())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ " OLS Regression Results \n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "Dep. Variable: y R-squared: 0.005\n",
|
|
|
+ "Model: OLS Adj. R-squared: -0.050\n",
|
|
|
+ "Method: Least Squares F-statistic: 0.09171\n",
|
|
|
+ "Date: Thu, 02 Nov 2023 Prob (F-statistic): 0.765\n",
|
|
|
+ "Time: 15:52:54 Log-Likelihood: -27.982\n",
|
|
|
+ "No. Observations: 20 AIC: 59.96\n",
|
|
|
+ "Df Residuals: 18 BIC: 61.96\n",
|
|
|
+ "Df Model: 1 \n",
|
|
|
+ "Covariance Type: nonrobust \n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ " coef std err t P>|t| [0.025 0.975]\n",
|
|
|
+ "------------------------------------------------------------------------------\n",
|
|
|
+ "x -0.0243 0.080 -0.303 0.765 -0.193 0.144\n",
|
|
|
+ "z 0.7873 0.445 1.768 0.094 -0.148 1.723\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "Omnibus: 0.939 Durbin-Watson: 2.375\n",
|
|
|
+ "Prob(Omnibus): 0.625 Jarque-Bera (JB): 0.886\n",
|
|
|
+ "Skew: 0.338 Prob(JB): 0.642\n",
|
|
|
+ "Kurtosis: 2.221 Cond. No. 11.0\n",
|
|
|
+ "==============================================================================\n",
|
|
|
+ "\n",
|
|
|
+ "Notes:\n",
|
|
|
+ "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "model1 = sm.OLS(data[['y']], data[['x', 'z']])\n",
|
|
|
+ "re1 = model1.fit()\n",
|
|
|
+ "print(re1.summary())"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.5"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 4
|
|
|
+}
|