|
|
@@ -199,7 +199,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 划分5份\n",
|
|
|
+ "# 将每星期工作时间平均划分成5份\n",
|
|
|
"category5 = range(0, 105, 20)\n",
|
|
|
"train_data = trans_feature(train_set, category5)\n",
|
|
|
"test_data = trans_feature(test_set, category5)\n",
|
|
|
@@ -229,6 +229,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
+ "# 展示模型结果\n",
|
|
|
"re = evaluation(category5_res, base_res)\n",
|
|
|
"re.savefig('continous_var_cut_5.png', dpi=200)"
|
|
|
]
|
|
|
@@ -275,7 +276,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
- "# 划分10份\n",
|
|
|
+ "# 将每星期工作时间平均划分成10份\n",
|
|
|
"category10 = range(0, 105, 10)\n",
|
|
|
"train_data = trans_feature(train_set, category10)\n",
|
|
|
"test_data = trans_feature(test_set, category10)\n",
|
|
|
@@ -305,6 +306,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
+ "# 展示模型结果\n",
|
|
|
"re = evaluation(category10_res, base_res)\n",
|
|
|
"re.savefig('continous_var_cut_10.png', dpi=200)"
|
|
|
]
|
|
|
@@ -317,7 +319,7 @@
|
|
|
"source": [
|
|
|
"def get_category(data):\n",
|
|
|
" '''\n",
|
|
|
- " 基于卡方检验,得到每星期工作时间的“最优”分段\n",
|
|
|
+ " 基于卡方检验,得到每星期工作时间的最优分段\n",
|
|
|
" '''\n",
|
|
|
" interval = [data['hours_per_week'].min(), data['hours_per_week'].max()]\n",
|
|
|
" _category = do_divide(data, interval)\n",
|
|
|
@@ -330,7 +332,7 @@
|
|
|
"\n",
|
|
|
"def do_divide(data, interval):\n",
|
|
|
" '''\n",
|
|
|
- " 使用贪心算法,得到“最优”的分段\n",
|
|
|
+ " 使用贪心算法,得到最优的分段\n",
|
|
|
" '''\n",
|
|
|
" category = []\n",
|
|
|
" p_value, chi2, index = divide_data(data, interval[0], interval[1])\n",
|
|
|
@@ -427,6 +429,7 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
+ "# 展示模型结果\n",
|
|
|
"re = evaluation(category_chi2_res, base_res)\n",
|
|
|
"re.savefig('continous_var_cut_chi2.png', dpi=200)"
|
|
|
]
|