{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# 使用pandas读取数据\n", "import pandas as pd\n", "\n", "\n", "data_path = \"./data/adult.data\"\n", "raw_data = pd.read_csv(data_path)\n", "## 选取需要使用的列\n", "cols = [\"age\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hours_per_week\", \"label\"]\n", "data = raw_data[cols]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageeducation_numcapital_gaincapital_losshours_per_weeklabel
039132174040<=50K
150130013<=50K
23890040<=50K
35370040<=50K
428130040<=50K
\n", "
" ], "text/plain": [ " age education_num capital_gain capital_loss hours_per_week label\n", "0 39 13 2174 0 40 <=50K\n", "1 50 13 0 0 13 <=50K\n", "2 38 9 0 0 40 <=50K\n", "3 53 7 0 0 40 <=50K\n", "4 28 13 0 0 40 <=50K" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## 观察数据\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageeducation_numcapital_gaincapital_losshours_per_weeklabellabel_code
039132174040<=50K0
150130013<=50K0
23890040<=50K0
35370040<=50K0
428130040<=50K0
\n", "
" ], "text/plain": [ " age education_num capital_gain capital_loss hours_per_week label \\\n", "0 39 13 2174 0 40 <=50K \n", "1 50 13 0 0 13 <=50K \n", "2 38 9 0 0 40 <=50K \n", "3 53 7 0 0 40 <=50K \n", "4 28 13 0 0 40 <=50K \n", "\n", " label_code \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 将label转换为可以运算的变量\n", "data.loc[:, \"label_code\"] = pd.Categorical(data.label).codes\n", "data.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfAAAAHfCAYAAACvVooFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3X+cZXV95/nXO9BkEYg2obYRI3aYIfFhhBZTIh0hFgwwYuzoEjP48FcMMT3JMmayySTiQFYx0WRcx8Q1AdMZMMRfWdRo7I2ElpEeWmyUblAkGkKi3SqCtoK0mIzZtJ/945ymi+rqqntv3ap7T9Xr+XjUo879nFvnfk5VnfM53/Pj+01VIUmSuuX7Rp2AJEnqnwVckqQOsoBLktRBFnBJkjrIAi5JUgdZwCVJ6iALuCRJHWQBl6QxlGRXkrVL+HlvTXLRUn2eFs4CLkkrRJLHJfnV2eZV1X+sqv9nqXPS4CzgIskVSe5N8qUkL2tjv5bkviT/I8mHk/xOG//5JF9o5/3iaDOX1KfHAbMWcHWPBXyFS3IicBbwI8AZwP+V5Bjgt4EnA58A7qyqy5P8GPBrwCTwNOB1SdaMJnOp22YeDCf5viRXta/fDaxq3/eKJH867ee2Jplqp3+jPfD+cpKXTnvPbAfl7wFuA56Y5P4kfz0jnz9N8ooZsf3LvzvJBW3sdUl+P8lfJ/lmkrfOs56zvr+Nv27a+3YlWZukkvxZki8meXOSbyQ5p+9f8Apw+KgT0GhV1ZfaU2q/DpwNrAH+pf1aBRwBfLd9+znAScDn2tdHAj8KfG0pc5a6bsbB8CrgdmAv8HTgh4GfAl48zzLOA14GnErTsv5skg8Bx3LgoPyx7bLfWVUvbq+pb62qtT3keC7wc8A64PHAx5Ksa2dfDJwLfBX4QpIrquqBORZ30Pvn+fhNwFOAbwF/CpwJfGy+nFcaC/gKl+Qsmg3ktcC1wC7ge8CO9ms38O/2vx34s6r65fZnj+FAcZfUu9kOhv898P6q+p/AB5J86xA/m/b7c4B3V9W3aArdMW384VkOygdxAfCuqnoQeDDJJ2kODAA2V9VtAEnuB34AmKuAz/b+Q60XwHbg4fb7WXi2eFb+UvRM4FPAe4HnTot9B/jhqnp2Ve1vYX8MuCDJ8W3x/gzNUbKk/uw/GD6+qo4Hfgi4G5g+POT3DvGzT5h1gckLk5zQHpR/EPgC8IoF5lkzpve//odDvOdQ5nx/ksOZdqBRVfvayX0z36sDLOB6P/BUmlNbT6E56v0W8OPA/Un+IcmfJ/mBqrqL5tr4dpqWwx9V1adHlLfUZbMdDH8FuDDJ9yd5Ps2pcGhOrT8RIMlzgX/Vxm8AXpzksUlOAP6QpujPdlC+3zeBH0zymPbryDlyvB54SXvn+pPb5X68ndfvONSzvf+R9QI2At/f5zJXPAv4CldVu6rqlKpaU1WvqqpjgCngqqpaQ3Mj22NoTsVRVVdX1Q9X1ROr6r+OLnOpu2Y7GAZ+D/g88CWaa8b3t2//a+D7k2ylKcgfb5exBXgP8Nk29n9U1f3MclCe5Efan/k28F9oWsRfpLm2fagcbwTeCdwJfBi4eNrZuGH4c2AyyRaa1vfuIS57RUhVvwdSWu7aG1Wuodm499HsHH6xqh4eaWKSpEdYwCVJC5LkiTSPqM20s6p+aqnzWSks4JIkdZDXwCVJ6iALuCRJHTTWHbkcd9xxtXbt2lGnIY3Uzp07v1FVE6POYym4zUu9b/NjXcDXrl3Ljh07Rp2GNFJJVszjNW7zUu/bvKfQJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOmisnwNfqTZv3rzgZWzYsGEImUhaaRa6/3Hfs3RsgUuS1EELKuBJrkyyoZ2+Osn2JJdPm99TTJIk9WfgU+hJzgKOr6rNSS4EDquq9UmuSXIycEovsaq6ZzirMh48/SRJWgoDtcCTrAL+BNiV5PnAFHBdO3sLcGYfMUmS1KdBT6G/HPgc8CbgdOAS4N523gPAGuCoHmOPkmRjkh1JduzZs2fA9CRJWt4GLeCnAZuq6n7gXcDNwJHtvKPb5T7cY+xRqmpTVU1W1eTExIoYQVEaK0nWJNnWTq9KsjnJLUkuXmhM0vAMWsD/HjipnZ4E1nLgdPg6YBews8eYpDGRZDVwLc3ZMoBXATur6lnAC5Mcs8CYpCEZ9Ca2q4FrkrwIWEVzbfvDSU4ALgDOAArY1kNsLHjzmQTAPuAi4C/b11PApe30zTQH7AuJ3bRIeUsrzkAt8Kr6dlX9bFX9ZFWtr6rdNBvrrcDZVfVQVe3tJTaMlZA0HFW1d8Z22eu9LAPd8wLe9yINamgduVTVg1V1XXtdvK+YpLHV670sA93zAt73Ig3KntgkzaXXe1m850VaYvaFLmku1wIfaTtuegrwSZrT4oPGJA2JLXBJB6mqqfb7buA84Bbg3Krat5DYCFZFWrZsgUuaU1V9lQM9KC44Jmk4bIFLktRBFnBJkjqo86fQ7YBFkrQS2QKXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOqjv0ciSHA58of0CeBXwQuC5wKeq6pL2fVf0EpMkLT+OFLn4BmmBnwq8t6qmqmoKOAI4Ezgd+HqSc5P8eC+xoayBJEkr0CDjgZ8BPC/J2cBngbuBD1RVJbkBuAB4qMfYjUNZC0mSVphBWuC3AedW1enAKuBI4N523gPAGuCoHmMHSbIxyY4kO/bs2TNAepIkLX+DFPA7q+q+dnoH8DBNEQc4ul1mr7GDVNWmqpqsqsmJiYkB0pMkafkbpIC/M8m6JIcBL6BpWZ/ZzlsH7AJ29hiTJEkDGOQa+OuB9wABPgz8DrAtyVuB57Rfu4Hf7SEmSZIG0HcBr6q7aO5Ef0R7R/lPAW+tqi/2E5MkSf0bpAV+kKr6J+D9g8QkSVL/7IlN0pySrE7ykfbpkD9uY1cn2Z7k8mnv6ykmaTgs4JLm8zLg3VU1CRyT5DeBw6pqPXBSkpOTXNhLbHSrIC0/QzmFLmlZ+ybw1CSPA55I0ynTde28LTRPl5zWY+yeJcpZWvZsgUuaz8eBJwG/AnyepvtkO2+SRswCLmk+rwV+qapeD/wt8GLsvEkaOQu4pPmsBk5pO296JvB72HmTNHJeA5c0n98F3kFzGn078Ps0nTedQDMo0RlA9RiTNCS2wCXNqao+VVU/VlVHV9V5VbUXmAJuBc6uqod6jY1mDaTlyRa4pL5V1YMcuMO8r5ik4bAFLklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqILtSlSQte5s3b17Qz2/YsGFImQzPwC3wJGuS3NFOX51ke5LLp83vKSZJkvq3kFPobwaOTHIhcFhVrQdOSnJyr7GFpy9J0so0UAFPcg7wHeB+muEC9482tAU4s4+YJEkaQN8FPMkRwG8Bl7aho4B72+kHgDV9xGZb/sYkO5Ls2LNnT7/pSZK0IgxyE9ulwJVV9a0kAA8DR7bzjqY5KOg1dpCq2gRsApicnKwB8lNrOd60IUlqDHIK/VzgkiRbgacBGzhwOnwdsAvY2WNMkiQNoO8WeFX95P7ptoj/NLAtyQnABcAZQPUYkyRJA1hQRy5VNVVVe2luULsVOLuqHuo1tpDPliRpJRtKRy5V9SAH7jDvKyZJkvpnV6qSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJfUkyZVJNrTTjkAojZgFXNK8kpwFHF9Vmx2BUBoPFnBJc0qyCvgTYFeS5+MIhNJYsIBLms/Lgc8BbwJOBy7BEQilkbOAS5rPacCmqrofeBdwM0MegbCqJqtqcmJiYnHWQFqGLOCS5vP3wEnt9CSwFkcglEZuKH2hS1rWrgauSfIiYBXNte0POwKhNFq2wCXNqaq+XVU/W1U/WVXrq2o3jkAojZwtcEl9cwRCafRsgUuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkdZAGXJKmDBi7gSY5Ncl6S44aZkCRJmt9ABTzJauD/pRnY4KYkE44PLEnS0hm0BX4q8GtV9QbgBuAcHB9YkqQlM1BPbFX1PwCS/CRNK/xYDh7397QeY/dMX3aSjcBGgBNPPHGQ9CRJWvYWcg08wEXAgzSDFgxlfGCHFpQkaX4DF/BqXALcCfwEQxwfWJIkzW3Qm9heneTl7cvHAb+H4wNLkrRkBh2NbBNwXZJXAncBHwJudnxgSZKWxqA3sT0InDc9lmSqjb1p/7i/vcYkSVJ/hjYeuOMDS5K0dLyJTJKkDrKAS5LUQRZwSZI6yAIuSVIHWcAlSeqgod2FruVr8+bNC/r5DRs2DCkTSdJ+tsAlSeogC7gkSR1kAZckqYMs4JIkdZAFXFJPkqxJckc7fXWS7Ukunza/p5ik4bCAS+rVm4Ejk1wIHFZV64GTkpzca2yEuUvLjgVc0rySnAN8B7gfmOLAgERbgDP7iM227I1JdiTZsWfPnkXIXlqeLOCS5pTkCOC3gEvb0FHAve30A8CaPmIHqapNVTVZVZMTExPDXwFpmbKAS5rPpcCVVfWt9vXDwJHt9NE0+5FeY5KGxA1K0nzOBS5JshV4GrCBA6fD1wG7gJ09xiQNiV2pSppTVf3k/um2iP80sC3JCcAFwBlA9RiTNCS2wCX1rKqmqmovzQ1qtwJnV9VDvcZGk7W0PNkCl9S3qnqQA3eY9xWTNBy2wCVJ6qCBCniSxya5PsmWJB9McoQ9M0mStHQGbYG/BHhLVZ1P07HDi7BnJkmSlsxA18Cr6sppLyeAlwJ/0L7e3+PSaRzcC9NssXsGyUGSpJVsQdfAk6wHVgNfZkg9M9mtoiRJ8xu4gCc5FngbcDFD7JnJbhUlSZrfoDexHQG8D3hNVe2m916Y7JlJkqQhGPQ58F8Ang5cluQy4B3Ay+yZSZKkpTFQC7yqrqqq1W2vTFNVdS32zCRJ0pIZWk9s9swkSdLSsSc2SZI6yAIuSVIHWcAlSeogC7gkSR1kAZckqYMcD1xLZvPmzQv6+Q0bNgwpE0nqPlvgkiR1kAVckqQOsoBLktRBFnBJkjrIAi5JUgdZwCVJ6iALuCRJHWQBlySpgyzgkiR1kAVc0pySPDbJ9Um2JPlgkiOSXJ1ke5LLp72vp5ik4bCAS5rPS4C3VNX5wP3Ai4DDqmo9cFKSk5Nc2EtsZGsgLUP2hS5pTlV15bSXE8BLgT9oX28BzgROA67rIXbPYucrrRS2wCX1JMl6YDXwZeDeNvwAsAY4qsfYbMvdmGRHkh179uxZpOyl5ccCLmleSY4F3gZcDDwMHNnOOppmP9Jr7CBVtamqJqtqcmJiYnFWQFqGBi7gSdYk2dZOr0qyOcktSS7uJyZpvCU5Angf8Jqq2g3spDkdDrAO2NVHTNKQDHQNPMlq4FqaU2QArwJ2VtXrknwkyfuAX+wlVlXfHsaKSFo0vwA8HbgsyWXAO4CXJTkBuAA4AyhgWw8xSUMyaAt8H3ARsLd9PcWBm1VuBib7iEkaY1V1VVWtrqqp9utamm35VuDsqnqoqvb2EhvNGkjL00At8HbDJMn+UK83sMx7U0uSjcBGgBNPPHGQ9CQtsqp6kAMH433FJA3HsB4j23+zykM0N6s83EfsUapqE7AJYHJysoaUnyQte5s3b17Qz2/YsGFImWgpDOsudG9qkSRpCQ2rBX4t8JEkZwFPAT5Jc6q8l5gkSerTglrgVTXVft8NnAfcApxbVft6jS3k8yVJWqmG1pVqVX2Vg29g6SkmSZL6Y09skiR1kAVckqQOsoBLktRBDicqSQPyuWuNki1wSZI6yBa4JGlseZbj0CzgkiT1aJwOKCzgkjRi41QU1B1eA5ckqYNsgatzbK1Iki1wSZI6yQIuSVIHWcAlSeogC7gkSR1kAZckqYMs4JIkdZAFXJKkDvI5cEkrjn0JaDmwgGvFcicuqcss4JI6w4Mu6YCRXANPcnWS7UkuH8XnS1o6bu/S4ljyFniSC4HDqmp9kmuSnFxV9yx1HtIw2CKcm9u7tHhG0QKfAq5rp7cAZ44gB0lLYwq3d2lRjOIa+FHAve30A8DTp89MshHY2L58OMndA37OccA3BvzZceE6jIdRr8OTRvjZCzXn9g5D3ebH3aj/j5bSSlpXGP769rTNj6KAPwwc2U4fzYyzAFW1Cdi00A9JsqOqJhe6nFFyHcbDcliHEZpze4fhbfPjbiX9H62kdYXRre8oTqHv5MBptHXArhHkIGlpuL1Li2QULfAPAduSnABcAJwxghwkLQ23d2mRLHkLvKr20tzYcitwdlU9tEgftRxOybkO42E5rMNILOH23gUr6f9oJa0rjGh9U1Wj+FxJkrQADmYiSVIHWcAlaRElOTzJl5Jsbb9OGXVOWrgka5Jsa6efkOQr0/7GE0uRw7Ip4Ekem+T6JFuSfDDJEV3swrH9p7ijne5c/gBJrkyyoZ3u1DokWZ3kI0l2JPnjNtapddDYORV4b1VNtV+fHXVCi2FGQVuVZHOSW5JcPOrchi3JauBamn4OAJ4JvGHa33jPUuSxbAo48BLgLVV1PnA/8CLaLhyBk5KcPNLsevdm4MjpXVDSofyTnAUcX1WbO7oOLwPe3T7TeUyS36R766DxcgbwvCSfag8Gl90gUrMUtFcBO6vqWcALkxwzsuQWxz7gImBv+/oM4JVJbk/yxqVKYtkU8Kq6sqo+2r6cAF5Kx7pwTHIO8B2aA5Apupf/KuBPgF1Jnk8H1wH4JvDUJI8Dngj8MN1bB42X24Bzq+p0YBXw3BHnsxhmFrQpDmw3NwPLqlOXqto744mK62nW+RnA+iSnLkUey6aA75dkPbAa+DKP7sJxzciS6kGSI4DfAi5tQzO7oBzr/FsvBz4HvAk4HbiE7q3Dx2m6MfwV4PPAEXRvHTRe7qyq+9rpHcCyO4szS0Hr4v5rIT5RVd+uqn3AHSzR33hZFfAkxwJvAy6mhy4cx8ylwJVV9a32ddfyBzgN2FRV9wPvojny7to6vBb4pap6PfC3wIvp3jpovLwzybokhwEvAD4z6oSWQBf3XwtxQ5LHJ3kMcD5w11J86LL5pbYt2PcBr6mq3XSvC8dzgUuSbAWeBmygW/kD/D1wUjs9Cayle+uwGjil3dk+E/g9urcOGi+vB94JfBrYXlU3jjifpdC1/e9CXQHcRNNh0durakkG5Fk2Hbkk+WXgjRw4un0H8GvAf6ftwrErvUC1RfyngW10KP/2RpVraE6XraK5kfDDdGsdTqf533kSsB34GTr2d5BGJcnWqppK8iTgI8CNwE/QbDf7Rpvd8rNsCvhs2jsjzwNubk/rdkrX8wfXQVqp2v7vzwRu8KB3cSzrAi5J0nK1bK6BS5K0kljAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nARyjJK5L86ajz6JIka5PsGnUekjRqFnBJGmNJXpfkdfO8Z6DGwFI1IpJsTTK12J+z0ljAJUnqIAv46K1K8u4k30zygTR+I8mXktyd5AI4+Cg8ya4ka6dNn5PkpiTXTnvPFUm+muS+JP/7XEm0y9+c5PPtZ2+YNu/nk3yhXc4vTotvTfLCJB9K8rF5ln9jkp9I8gdJrk7y1CSfauc9N8nfJvnajHWcNT5t/rOT3JVkYq7PlqTlyAI+ej8D/DnwJOBZwH8Cfg5YB/xvwDuSrOlhOW8Gfgv4DwBJjgVeA/wY8FTg3/SwjB8DfgLYAFyT5DFJfgz4NWASeBrwuhn5vBG4ps11LrcDPwo8DjgM+BHg9rb4vg04H/jXwM8mOe1Q8f0LS/IU4O3AT1fVnh7WTVoWkvxSe5B9b5JXT5v1hCTb24P2n5/2/jkPhPv43PPaA/yvJfn9afHz2+V/Ocl/nhb/P9uD/uuBHxh2PoLDR52A2FFVmwGS3A28CHhXVT0IPJjkk8BZs/xcZrx+U1V9fNrrh4C7gd8H/prmoGA+H5r2uV+jKbJnAScBn2vfcyRNIf5a+/qaqvpwD8u+neYA4HCggCe3sTOAJwCfat/3/TQHEj90iPjHgaOB9wH/CHyxh8+WloUk/wvwMprt5lvA7iR/1M4+AziFZvvameQG4P+jORA+G3gQuDXJX1bVHX1+7nHAnwHnAV8AbklyPrCzjZ8P7AZuTvJp4BvAxcBTaA7Ab22Xs//AfEH5qGELfPT+Ydp0zfi+f3r6a5IcDsxsld86/UVV7QOeAbwfeDZwR5Ij5sll+kHB9wHfa2N/VlXHV9XxNIV1+mc96nPncHubz3eA+2nOCNzeLv+mGcv/wBxxaI7mfxX4O+DFPX6+1HlV9T+Bl9MU8euAY4Hj2tk3VtWuqtpNc+A7yaMPkO8BTqA5EO7XeuDTVXVXVf1jVZ1WVVtozth9uqrurKqHgD8FntvG/6qqHqyq24DPtssZVj7CAj4OasbrVwMvSfK4JE8GnknT6twLPLF9z0aaFukhJfkR4L+3X68Gjgd+cJ5cXpBkdXuq+gdpNrCPARckOT7JMcBnaI6q+3UPzQ7lHprCexbNRn0rcFqSH20PMD5KczR/qDjAV6vqo8BrgSuSzPm7kJaLJP8KuBl4APh14MvTZk/fl3yPZv8+14HwQvJYn+QZs3zu/gZHZsmHxcpnpbKAj59/Ad4J3Al8GLi4qr5Gc518MskWmtb37rkWUlV/B2yjOcX8d8AfVtV983z2HTQHC3/Zfu4/VdVdwG8D22lOo/9RVX2635WqqgI+3ebyd8Dnquq7VfV14JXtuu4CtlfVXx4qPss63gxc0m8+UkedRrM9XENzKeuHps07J8mJSX6IpqV7O3MfCPdjO/C0JE9pD5j/S/vZn2jjT03yAzSX6q6naWFfkOSxSZ4OnNouZ1j5CEizX9VKt/9mkqp63WgzkTTd9G0zyeOAv6Ip3ltoLktdQnMq+heBY4AJ4Iqqenv7888D/ms7771V9evTlv0KYKqqXtFDHs8B/qBdznuq6jfa+PnA/w0cBVxVVW9s42+gOQi/m+bm1V+pqq1z5aP+WMBXmCT3zxL+OvAXsPACnuQ2Dpzqn+5JVfXdhSxbknSABVySBECSv6C5AW2mM6vq75c6H83NAi5JUgd5E5skSR1kAZckqYPGuie24447rtauXTvqNKSR2rlz5zeqakX09+42L/W+zY91AV+7di07duwYdRrSSCWZ85n/5cRtXup9m/cUuiRJHWQBlySpgyzgkh7Rdn15fZItST6Y5Ih26Mqt7dcp7fuuSHLbtJGweo5JGg4LuKTpXgK8parOpxk17lKa7i6n2q/PJvlx4EzgdODrSc7tNTaSNZKWKQu4pEdU1ZXtSG/Q9Kn9L8DzknwqydXtULbPBj7QDlBzA83Icr3GJA2JBVzSQZKsB1bTjBZ1blWdDqyiGev5KODe9q0P0IyO12tsts/amGRHkh179uxZhLWRlqeeCniSNUm2tdNXTLse9rdJXpPkCUm+Mi0+0b736iTbk1w+bVkHxSSNjyTHAm8DLgbunDYM7Q7gZOBh4Mg2djTNfqTX2EGqalNVTVbV5MTEinjcXRqKeZ8DT7IauJbmaJqqeu20ee8H/gx4JvCGqrpq2rwLgcOqan2Sa5KcDJwyM1ZV9wx3lbpv8+bNC17Ghg0bhpCJVpp2jOb3Aa+pqt1JrmuHhbwLeAHwRuCfgX9HM0b9OprxqXf2GFuQhW4bbhdaTnppge8DLgL2Tg8meQbwlaq6l2bw+FcmuT3JG9u3TAHXtdNbaG5mmS0maXz8AvB04LIkW4G/Ad4JfBrYXlU3Ah8HTkvyVtqb3PqISRqSeVvgVbUXIMnMWf8R2N8avx74beAfgRuTnMrB17+efojYoyTZCGwEOPHEE3tfE0kL1p5Fu2pG+IoZ7/lee0f5TwFvraovAvQakzQcA3WlmuRxwP9aVf/Qhj5RVd9t591Bf9fJHqWqNgGbACYnJx3rVBpDVfVPwPsHiUkajkHvQn8+8JFpr29I8vgkjwHOp7letpMDp8inXxObGZMkSX0adDCTfwu8edrrK4CbaG5ueXtV3Z3kPmBbkhOAC2iuk9csMUmS1KeeC3hVTU2bfvGMeTcBT54R25tkCjgPeFNVPQQwW0ySJPVnUYcTraoHOXDX+SFjkiSpP/bEJklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB/VUwJOsSbKtnX5Ckq8k2dp+TbTxq5NsT3L5tJ/rKSZJkvozbwFPshq4FjiqDT0TeENVTbVfe5JcCBxWVeuBk5Kc3GtscVZLkqTlrZcW+D7gImBv+/oM4JVJbk/yxjY2BVzXTm8Bzuwj9ihJNibZkWTHnj17+lkXSZJWjHkLeFXtraqHpoWupynEzwDWJzmVpnV+bzv/AWBNH7GZn7epqiaranJiYqLvFZIkaSU4fICf+URVfRcgyR3AycDDwJHt/KNpDgx6jUmSpD4NUkBvSPL4JI8BzgfuAnZy4HT4OmBXHzFJktSnQVrgVwA3Af8MvL2q7k5yH7AtyQnABTTXyavHmKQxkeSxwJ8DhwHfobn/5SrgKcBfVdXvtO+7etCYpOHouQVeVVPt95uq6slVdWpV/WEb20tzXfxW4OyqeqjX2BDXRdLCvQR4S1WdD9wPvIgBnzDxqRNpcQ3SAp9VVT3IgTvM+4pJGg9VdeW0lxPAS4E/aF/vf3LkNA5+mqTX2D2Llbu00ngTmaSDJFkPrAa+zOBPmMz71En7WT46Kg3AAi7pUZIcC7wNuJiFPWHS01MnPjoqDcYCLukRSY4A3ge8pqp2s7AnTHzqRFpEQ7sGLmlZ+AXg6cBlSS4D3gHhlUATAAAOuElEQVS8bMAnTHzqRFpEtsAlPaKqrqqq1dPGOriWAZ8w8akTaXHZApc0p4U8YeJTJ9LisQUuSVIHWcAlSeogC7gkSR1kAZckqYMs4JIkdZAFXJKkDrKAS5LUQRZwSZI6yAIuSVIHWcAlSeqgngp4kjVJtrXTJybZmuRjSTal8YQkX2njW5NMtO+9Osn2JJdPW9ZBMUmS1J95C3iS1cC1wFFt6N8Dv1xV5wBPBE4Bngm8YdoACHuSXAgcVlXrgZOSnDxbbDFWSpKk5a6XFvg+4CJgL0BVXVZVn2/n/SDwDZphAl+Z5PYkb2znTXFgEIMtNOMCzxaTJEl9mreAV9Xe2YYBTHIR8DdV9VXgepri/AxgfZJTaVrs97ZvfwBYc4jYzOVuTLIjyY49e/b0v0aSJK0AA93EluQk4D8Bv9qGPlFV366qfcAdwMnAw8CR7fyj28+aLfYoVbWpqiaranJiYmKQ9CRJWvb6LuDtNfH3AhdPa5nfkOTxSR4DnA/cBezkwCnydcCuQ8QkSVKfDh/gZy4FTgTelgTgtcAVwE3APwNvr6q7k9wHbEtyAnABzXXymiUmSZL61HMBr6qp9vurgVfP8pYnz3j/3iRTwHnAm/a31meLSZKk/gzSAu9ZVT3IgbvODxmTJEn9sSc2SZI6yAIuSVIHWcAlSeogC7gkSR1kAZckqYMs4JIkdZAFXJKkDrKAS5LUQRZwSZI6yAIu6VGSrEmyrZ1+QpKvJNnafk208auTbE9y+bSf6ykmaTgs4JIe0Y42eC1wVBt6JvCGqppqv/YkuRA4rKrWAyclObnX2CjWSVquLOCSptsHXATsbV+fAbwyye1J3tjGpjgwnsEWmiGCe41JGhILuKRHVNXeGaMEXk9TiJ8BrE9yKk3r/N52/gPAmj5iB0myMcmOJDv27NkzxLWRljcLuKS5fKKqvl1V+4A7gJOBh4Ej2/lH0+xHeo0dpKo2VdVkVU1OTEwszlpIy5AFXNJcbkjy+CSPAc4H7gJ2cuB0+DpgVx8xSUOyqOOBS+q8K4CbgH8G3l5Vdye5D9iW5ATgAprr5NVjTNKQ2AKXdJCqmmq/31RVT66qU6vqD9vYXprr4rcCZ1fVQ73GlnxFpGWspwI+47nQVUk2J7klycULjUnqnqp6sKquq6r7+41JGo55C/gsz4W+CthZVc8CXpjkmAXGJElSn3ppgc98LnSKA8923gxMLjD2KD5SIknS/OYt4LM8F7qQZ0DnfS7UR0okSZrfIDexLeQZ0J6eC5UkSXMbpIAu5BlQnwuVJGkIBnkO/FrgI0nOAp4CfJLmtPigMUmS1KeeW+DTngvdDZwH3AKcW1X7FhIb6tpIkrRCDNQTW1V9lQN3ky84JkmS+uNNZJIkdZAFXJKkDrKAS5LUQRZwSZI6yAIuSVIHWcAlSeogC7gkSR1kAZckqYMs4JIkdZAFXJKkDrKAS5LUQRZwSZI6yAIuSVIHWcAlSeogC7gkSR1kAZckqYMGKuBJfjnJ1vbr00muTvKlabFT2vddkeS2JH807WcPikmSpP4MVMCr6qqqmqqqKWAb8MfAe/fHquqzSX4cOBM4Hfh6knNniw1nNSRJWlkWdAo9yROANcAk8Lwkn2pb44cDzwY+UFUF3ACcdYiYJEnq00KvgV8CXAXcBpxbVacDq4DnAkcB97bve4Cm0M8We5QkG5PsSLJjz549C0xPUr+SrEmyrZ1elWRzkluSXLzQmKThGbiAJ/k+4GxgK3BnVd3XztoBnAw8DBzZxo5uP2u22KNU1aaqmqyqyYmJiUHTkzSAJKuBa2kOtgFeBeysqmcBL0xyzAJjkoZkIS3ws4BPtqfD35lkXZLDgBcAnwF20lzvBlgH7DpETNL42AdcBOxtX08B17XTN9NcLltITNKQHL6An/23NBslwOuB9wABPlxVN7Yt9N9N8lbgOe3X7lliksZEVe0FSLI/1OulsIEumbWftRHYCHDiiScOZ0WkFWDgFnhV/eeq+ot2+q6qOrWqTqmqy9rY94Bzae5Sv6CqvjhbbOGrIGkR9XopbKBLZuBlM2lQi9qRS1X9U1W9v6q+MFdM0tjq9VKYl8ykJbaQU+iSlr9rgY8kOQt4CvBJmtPig8YkDYldqUo6SNtJE1W1GzgPuIXmUdF9C4mNYFWkZcsWuKQ5VdVXOXA3+YJjkobDFrgkSR1kAZckqYMs4JIkdZAFXJKkDrKAS5LUQRZwSZI6yAIuSVIHWcAlSeogC7gkSR1kAZckqYPsSlWSpB5t3rx5QT+/YcOGIWViC1ySpE6ygEuS1EF9F/Akhyf5UpKt7dcpSa5IcluSP5r2vp5ikiSpf4O0wE8F3ltVU+2YwUcAZwKnA19Pcm6SH+8lNpQ1kCRpBRrkJrYzgOclORv4LHA38IGqqiQ3ABcAD/UYu3EoayFJ0gozSAv8NuDcqjodWAUcCdzbznsAWAMc1WPsIEk2JtmRZMeePXsGSE+SpOVvkAJ+Z1Xd107vAB6mKeIAR7fL7DV2kKraVFWTVTU5MTExQHqSJC1/gxTwdyZZl+Qw4AU0Lesz23nrgF3Azh5jkiRpAINcA3898B4gwIeB3wG2JXkr8Jz2azfwuz3EJEnSAPou4FV1F82d6I9o7yj/KeCtVfXFfmKSJKl/Q+lKtar+CXj/IDFJktQ/e2KTJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOmgod6FreRunAewlSQ1b4JLm5BDC0niygEuaj0MIS2PIU+iS5rOoQwgn2QhsBDjxxBOXZIWk5cAWuKT5LOoQwo5AKA3GFrik+dxZVd9tp3dwoIjDEIYQljQYNyhJ83EIYWkM2QKXNB+HEJbGkAV8GfP5bQ2DQwhL48kCLqlvDiEsjZ7XwCVJ6iALuCRJHdR3AU/y2CTXJ9mS5INJjpjZzWL7PrtVlCRpkQzSAn8J8JaqOh+4H7iUad0sVtVn7VZRkqTF1XcBr6orq+qj7csJ4F9ouln8VJKrkxwOPJu2C0XgBuCsQ8QOkmRjkh1JduzZs2eAVZIkafkb+Bp4kvXAauCjPLqbxedit4qSJC2qgR4jS3Is8DbgZ4D7Z3SzeDIruFtFn72WJC2FQW5iOwJ4H/CaqtrNwd0sfga7VZQkaVEN0gL/BeDpwGVJLgNuAt5J281iVd2Y5PvoWLeKtpwlSV3SdwGvqquAq2aEr5jxnu/ZraIkSYtn0bpStVtFSZIWz7K8kUySpOXOwUy0ZLzPQJKGxxa4JEkdZAGXJKmDLOCSJHWQBVySpA6ygEuS1EEWcEmSOsgCLklSB1nAJUnqIAu4JEkd1Pme2OzdS5K0EtkClySpgyzgkiR1UOdPoWvl8bKJJNkClySpk0ZSwJNcnWR7kstH8fmSlo7bu7Q4lvwUepILgcOqan2Sa5KcXFX3LHUe0jBOxXs6f25u79LiGUULfAq4rp3eApw5ghwkLY0p3N6lRTGKm9iOAu5tpx8Anj59ZpKNwMb25cNJ7p5lGccB31i0DAczbjmZz/zGLadD5fOkpU5kiObc3qHnbR668/calXHKZ5xyge7l09M2P4oC/jBwZDt9NDPOAlTVJmDTXAtIsqOqJhcnvcGMW07mM79xy2nc8hmSObd36G2bh/H7/ZjPoY1TLrB88xnFKfSdHDiNtg7YNYIcJC0Nt3dpkYyiBf4hYFuSE4ALgDNGkIOkpeH2Li2SJW+BV9VemhtbbgXOrqqHBljMvKfbRmDccjKf+Y1bTuOWz4INaXvfb9x+P+ZzaOOUCyzTfFJVw1iOJElaQvbEJklSB1nAJUnqoM4V8HHoljHJY5Ncn2RLkg8mOWJM8lqT5I52euT5tHlcmWTDqHNKsjrJR5LsSPLHY5DPmiTb2ulVSTYnuSXJxYeKrRS9/F2W8m8332fNtj8YZT7T3vfI/mAMcnlkPzDKfGbbDyxyPo9s54eYv6DtvFMFfHq3jMBJSU4eUSovAd5SVecD9wMvGpO83gwcOS6/pyRnAcdX1eYxyOllwLvbZy+PSfKbo8onyWrgWppOTgBeBeysqmcBL0xyzCFiy14v/ydL+b/U42fN3B88Z8T57PdmDjyDP7Jcpu8HFiuXPvKZuR9YtGfDZ9nOZ7Og7bxTBZwx6Zaxqq6sqo+2LyeAl446ryTnAN+h2YFMjUE+q4A/AXYlef4Y5PRN4KlJHgc8EfjhEeazD7gI2Nu+npqWy83A5CFiK8EU8/9dennPkuUzy/7g66PMBw7aH4wsl1n2A4tp3nw4eD/w5UXMZ+Z2PpspFrCdd62Az+yWcc0IcyHJemA1zT/ByPJqT9n9FnBpGxqH39PLgc8BbwJOBy4ZcU4fp+me8FeAzwNHjCqfqto743Gq2f5e4/A3HIVe1nspfzc9f9b+/UFV3TrKfGbZH4wsF2bsB5K8asT5zNwPPLBYycyync9mQf/LXSvg83bLuFSSHAu8Dbh4DPK6FLiyqr7Vvh51PgCnAZuq6n7gXTRHl6PM6bXAL1XV64G/BV484nymm+3vNQ5/w1HoZb2X8nfT02fN2B8spl7ymbk/GGUuM/cDZ484n5n7gZ9fxHx6saD/5a7tFMaiW8b2CPd9wGuqavcY5HUucEmSrcDTgA0jzgfg74GT2ulJYC2jzWk1cEqSw4BnAr834nymm+3/Z9T/U6PSy3ov5e9m3s+aZX+wmHpZ90ftD5L8txHmMnM/sJi/n17ymbkfGHVHKAv7X66qznwBPwB8BngLzemPx44oj18GHgS2tl8/Nw55tbltHYffE3AMzU7tZmA7zWmrkeVEcxr/b2iOeD86Jr+jre33J7W5vRW4DThsttio/qeW+Hcy8++yDvided6zaH+7HvOZuT+4aJT5zPY/NsLfzcz9wBNGnM/M/cDRi5XPzL8BcA7wH2bMW9B23rme2No7+84Dbq7mtMxYGLe8xi0fGL+cximfNH2FnwncUO11s9liK0Evf5el/NuN0//JuOUzTrmMYz69WMh23rkCLkmSuncNXJIkYQGXJKmTLOCSJHWQBVySpA6ygEuS1EH/P7iV12alhMDHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 画直方图,直观了解数据\n", "import matplotlib.pyplot as plt\n", "\n", "\n", "plt_data = data[[\"age\", \"hours_per_week\", \"education_num\", \"label_code\"]]\n", "plt_data.hist(rwidth=0.9, grid=False, figsize=(8, 8), alpha=0.6, color=\"grey\")\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageeducation_numcapital_gaincapital_losshours_per_weeklabellabel_code
count32561.00000032561.00000032561.00000032561.00000032561.0000003256132561.000000
uniqueNaNNaNNaNNaNNaN2NaN
topNaNNaNNaNNaNNaN<=50KNaN
freqNaNNaNNaNNaNNaN24720NaN
mean38.58164710.0806791077.64884487.30383040.437456NaN0.240810
std13.6404332.5727207385.292085402.96021912.347429NaN0.427581
min17.0000001.0000000.0000000.0000001.000000NaN0.000000
25%28.0000009.0000000.0000000.00000040.000000NaN0.000000
50%37.00000010.0000000.0000000.00000040.000000NaN0.000000
75%48.00000012.0000000.0000000.00000045.000000NaN0.000000
max90.00000016.00000099999.0000004356.00000099.000000NaN1.000000
\n", "
" ], "text/plain": [ " age education_num capital_gain capital_loss \\\n", "count 32561.000000 32561.000000 32561.000000 32561.000000 \n", "unique NaN NaN NaN NaN \n", "top NaN NaN NaN NaN \n", "freq NaN NaN NaN NaN \n", "mean 38.581647 10.080679 1077.648844 87.303830 \n", "std 13.640433 2.572720 7385.292085 402.960219 \n", "min 17.000000 1.000000 0.000000 0.000000 \n", "25% 28.000000 9.000000 0.000000 0.000000 \n", "50% 37.000000 10.000000 0.000000 0.000000 \n", "75% 48.000000 12.000000 0.000000 0.000000 \n", "max 90.000000 16.000000 99999.000000 4356.000000 \n", "\n", " hours_per_week label label_code \n", "count 32561.000000 32561 32561.000000 \n", "unique NaN 2 NaN \n", "top NaN <=50K NaN \n", "freq NaN 24720 NaN \n", "mean 40.437456 NaN 0.240810 \n", "std 12.347429 NaN 0.427581 \n", "min 1.000000 NaN 0.000000 \n", "25% 40.000000 NaN 0.000000 \n", "50% 40.000000 NaN 0.000000 \n", "75% 45.000000 NaN 0.000000 \n", "max 99.000000 NaN 1.000000 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## 通过统计方法,了解数据性质\n", "### 数据的基本统计信息\n", "data.describe(include=\"all\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
label<=50K>50K
education_num
(0.999, 9.0]128351919
(9.0, 10.0]59041387
(10.0, 12.0]1823626
(12.0, 16.0]41583909
\n", "
" ], "text/plain": [ "label <=50K >50K\n", "education_num \n", "(0.999, 9.0] 12835 1919\n", "(9.0, 10.0] 5904 1387\n", "(10.0, 12.0] 1823 626\n", "(12.0, 16.0] 4158 3909" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "### 计算education_num和label的交叉报表\n", "cross1 = pd.crosstab(pd.qcut(data[\"education_num\"], [0, .25, .5, .75, 1]), data[\"label\"])\n", "cross1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(
,\n", " OrderedDict([(('(0.999, 9.0]', ' >50K'),\n", " (0.0, 0.0, 0.4464224253288598, 0.12963430830402656)),\n", " (('(0.999, 9.0]', ' <=50K'),\n", " (0.0,\n", " 0.13295656744023918,\n", " 0.4464224253288598,\n", " 0.8670434325597608)),\n", " (('(9.0, 10.0]', ' >50K'),\n", " (0.451348533703244,\n", " 0.0,\n", " 0.22060904860191918,\n", " 0.18960252730463215)),\n", " (('(9.0, 10.0]', ' <=50K'),\n", " (0.451348533703244,\n", " 0.19292478644084476,\n", " 0.22060904860191918,\n", " 0.8070752135591551)),\n", " (('(10.0, 12.0]', ' >50K'),\n", " (0.6768836906795475,\n", " 0.0,\n", " 0.07410116033823905,\n", " 0.2547653188161416)),\n", " (('(10.0, 12.0]', ' <=50K'),\n", " (0.6768836906795475,\n", " 0.25808757795235426,\n", " 0.07410116033823905,\n", " 0.7419124220476456)),\n", " (('(12.0, 16.0]', ' >50K'),\n", " (0.7559109593921708,\n", " 0.0,\n", " 0.24408904060782916,\n", " 0.4829568971162197)),\n", " (('(12.0, 16.0]', ' <=50K'),\n", " (0.7559109593921708,\n", " 0.48627915625243234,\n", " 0.24408904060782916,\n", " 0.5137208437475675))]))" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD8CAYAAACYebj1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xl8VOW9x/HPk8kGAWIgYQmLCVsUUBYhIiKiolbcUMHaWq/LxbUqdUO9tha1orZa12rV2ttqL25gQZEKUkAosomChEIqlbAkkIAUkgAJmZnn/jGT45CZJJMwyWT5vl+vvF6ZOc85z3NmkvnOOed3zjHWWkRERABioj0AERFpOhQKIiLiUCiIiIhDoSAiIg6FgoiIOBQKIiLiUCiIiIhDoSAiIg6FgoiIOGKjPYC6MsZYY5RlItFkDOhiCM2LtV6staa2ds0wFGJYvs0T7WGItGqn9YQVO6I9CqmLUb2MN5x2+sotIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiIi4lAoiIiIQ6EgIiIOhYKIiDgUCiLSKlhrWTB7RrSHEbZFc9/n+UfuYsXivzVqvwoFEWkV5vzfa/TqnQXAlk1f89AtkwBwV1TwPzdP5ParzuaPzz0act6/f/Qe91w7nt89PjXs/gL7qPSb/7mVvG82hWy/btUyplx9Hk9MvRGPx8PZF03iyhumsG9vYdh9RoJCQURahV078zjh5FM4dLCUBbNncOhgCQD527/l8mtu5aV3FvHlisVB81lrefePz/H0nz6mQ0on8rZsrrWvqn0ArF/9DxLbJpHR78SQ87zx7DSefP2vjBg9jjXLPq3nWh47hYKItHj79+0lrWt3ANomteO2B590ph3fJ4vho8/hu6LdtE9OCZr38KGDdOnWE2MM/QYOIT9vS639Ve3D6/Xy24dvJ++bf/Kbh27D7XYHzROfkECbtkn0GziEnWH00VAUCiLS4hljam3zwmN3c+M9jwU9HxcXj8fj+xD3uN0cLC2uc///3ryBbj0zeebP8+jcrQdf/GNhUJuYGNcx9REpCgURafGSUzqxZ3d+tdNnvPYMw047i95ZA4OmxcXHAwa3203uhrV07X58nftPbNOWnpn9fb8ntsXEBH/0tk9OobT4QL37iBSFgoi0Cr36ZPH1muVBz+/M28KbL03nn+tW8cTUybjdbl547J6j2ky87nZ+cduV7NtbyEnDR/G3mW/yzT/Xh913z8x+uN0V/Oqe68n5aiXDTz+HV558EHdFhdPm6lunMv2+G1i97FPOGj+x/it6jIy1Nmqd10dMjMsu3+aJ9jBEWrXTesKKHdEeRd0tXTCHMeddeszLWbV0AQMGZ9M++bgIjCq0RXPfZ8Paz8kecx6nnXXBMS9vVC/jtda6amunUBCROmuuodCahRsK2n0kIlIHHk/L/lKqUBARCVPB9q28+bsnQk6797qLeGLqZOcEt6onvN11zQ8AePmJB1g09/3GGXA9xEZ7ACIizUHels0s/vh9rr3jIT6bP5uNX650pl01+S66pPfkvumvAN+f8PbqB8v5yyu/dk54W/v5Yg4dLOHsiyaF7KMpUCiIiNTim3+uZ/XSBVw/5RcAnHn+BM48f4IzfdP6L/hm03qm3Xk1vbNOYuJ1twed8Ob1eHj0Z9fw+w+CK6CaEoWCiEgt4uLicbsr8Hq9xMTEsHjeLHK+XOFMv/rm+3jx7UUkJCZy3/UXc9Xku4JOePN6vVx1492898fnmfLwb6O1KrVSKIiI1CKj34kktGnL6888zA1THuas8Vdw1vgrnOlfrfyM5JRUMvsPoLTkgP8M6u9PeBt++jnExsXxoxvvZsqPz2Vv4S5Su3SL3grVQAeaRUTC0K3H8Uy67g7+8vtfB03r1TuLlx6/lwdvupzzJ1xNXHx80AlvlX5y6/3VHqxuCnSegojUmc5TaH50noKIiNSZQkFERBwKBRERcSgURETEoVAQERGHQkFERBwKBRERcSgURETEoVAQEYe1lgWzZ0R7GGFbNPd9nn/kLlYs/lu0h9JiKBRExDHn/16jV+8sALZs+pqHbvn+Es8vPX4f/3PzROa9/+eQ865btYwpV5/HE1NvDPtGNEW7dnLHVec4j5964Gbu/NE4nnrg5rD6OPuiSVx5wxT27S0MdxWlFgoFEXHs2pnHCSefwqGDpSyYPYNDB0sA+HduDiUH/sP0V2fy5YrFIed949lpPPn6Xxkxehxrln0aVn/vvvEc1noB2L9vL8NPP4cX3l5IYcF2jpSXR6QPqRuFgogAvg/ltK7dAWib1I7bHnzSmbZ7Rx5ZJ50CQKfO3SgtLQ2aPz4hgTZtk+g3cAg787aE1ecdP3+auPgEAI7rmMo5F19JedlhKo4cIT4hISJ9SN0oFEQEwH+559Di4hNwV1QA4PG4KS4uDmoTE+O71lrl/QPq6/e/foj/+umDIadFqg+pnkJBRABITunEnt35IadlnTSMLZvWA7Bty2ZSU1OD2rRPTqG0+AC5G9bStfvx9RrDoo9n0qZtEiPOGBdyeiT6kJopFETE0atPFl+vCb5dZHJKJ/qccDIP3HgZp59zEfHx8bzy5IPO1gPA1bdOZfp9N7B62aecNX4iq5YuYNXSBWH3fbC0hKfuv5E9u/N5YupkvivaXWsfEnm6n4JIhFhr+XTO25w34cfRHkpIb738FNu/zeWWqdPp1Llrte2WLpjDmPMurXFZ4dxP4V8b15HYpi29evevz3DDsmju+2xY+znZY87jtLMuaLB+WoJw76egUBCJkNl/eZUTTh6OiYnhjd/+krLDh7jxnkePuutWpZcev4+C7VsZPe5ixk+6Nqzlz33vf/nP3iKuue1+ysvKePj2q3C5XFx35y/oP3BIUPt3/vAsX638jJNOGcVPbp0KwMfv/4lhI8fSrWfGMa1rbaHg8XhwuWr9/JFGpJvsiDSyynLOv7zyFL949k0eeelt3nr5yaB24ZR3VpXz5Uo2fLGcyi9xc999g7PGT+SRF9/mgzdfDmpfcmA/qz6bz1N/mE3Rrh0cLC05tpWrg4LtW6u93eS9113EE1Mn87vHfSH194/e455rxzuP77rmBwC8/MQDLJr7fuMMWI4SG+0BiLQEgeWcAIcPlbK3aBcpqV2C2lYt7zx0sJS2Se1qXP6gYSM5Ul5GzpcrASjYsZWLf/jfxMXHhyzdLNq9k74nDgagd/9B7M7fRp+sQfVev3DlbdnM4o/f59o7HuKz+bPZ6B8vwFWT76JLek/um/4K4Nvd9u4fn+PVD5bzl1d+Td6WzQCs/Xwxhw6WcPZFk0L2IQ1LoSASAYHlnP991zRe+83P+XzxPJ59a35Q26rlnQdLimsNhZDLcPuWUbx/X9D0+IDpHo+bQyUNX775zT/Xs3rpAq6f8gsAzjx/AmeeP8GZvmn9F3yzaT3T7rya3lknMfG62+nSrSfGGPoNHEJ+3ha8Hg+P/uwafv9B8MFuaRwKBZEICCznzOh7ApPveRSPx0PWoKFBbbNOGsbCj94BfOWdx3UMLu+szYAh2WzesJYeGX0pLzscND29V28K87cBkJvzJWMCPpwbSlxcPG53BV6vl5iYGBbPm0XOlyuc6VfffB8vvr2IhMRE7rv+Yq6afBcejxv4/rwDr9fLVTfezXt/fJ4pD/+2wccswXRMQSRCAss533xpOpPvfgSA4v3/4U8vPu60q1reGRcfzwuP3VOnvkadfSGb1q3m4Z9exdW3+PbHBy7D5XJx9oVX8uBNl9O5W8+jdm01lIx+J3LehKt5/ZmHqThyhLPGX8EdP3/a+dn2783kb/s31lpKSw74t64MbrfbOe8gNi6OH914N99u3sDewl0NPmYJpuqjKGvqZYxVvfzEAxQWbOeRF5vPlTQbUzjlnKEs/PBdxl3ywwYY0ffCLUkNR03VR/v2FDLn7de5/s6fH/X8d0W7efze64lPSGTkmT9gwk9u5ssVS3j/f1+gU+du3PPYS9z9Xxfw7FufsGbZQpZ9+iF3P/rCMY1TvqeS1Gaisowxs//AGksMPR4PT95/I/u/20PfAYO5+b5f8cJj95C/7d8c1zGNB556jRmvPs3XXywnNjaOh597i4TExBr7zs35ql6lk3/47TQm3z0tYq+B+DSnMs5wzlOQpkUlqc1EZRljbSWGq5cuoN+AIfzmfz/im43r+HqNrzzxqT/MJi4+nnWrlrJp/Rqe+sNsBgzJZsXiebX23RClk1I/KuOUpuKYDjQbY7oAA621iyI0nlYlsIyxthJDr8dD2eFDuN1uSov30yapHeWHD2GtpahgB117ZFDmf7w7fxsnnRL8rT+USJdOSt2pjFOaknqHgjGmK/AbYEo102OBb/0/AHdYazcYYx4BxgOrrbU/NcZMA7YAbwN/Bx631raKC6UHljHWVmJ46tgfsH7NP7h14hkMGjqSfgMG061nJnf+aBxJ7TvQrcfxjB1/BVNvuIQ9hfnc/eiLtfbfGKWTUjOVcUpTU69QMMakA0/i+6Dfb4x5FcgKaLIImAu8ba29P2C+U4DRQDbwsDEm8FKI04BlrSUQ4OgyxtpKDGNjY7ntwSd56JZJXOO/rPA1t91PYcF2Lr/mNgAuuvJ69u0ppP/AITVeBrlSY5ROSs1UxilNTZ2PKRhjugPTgduttfsBrLU3W2vHBvw8CowELjLGrDbGvOHfcjgTmGV9R7fnA2f4F3s2cC6+YGhVKssYaysxBPhq1VL6DRxCx9TOAOzM85X39c4aCPgubfDNxq8YOda3j/lvM9/km3+ur7H/upZOSmSpjFOamjpXHxljrgY6WWtrrBUzxowAdlprdxlj3gRmAoOBr621c4wx/YG7gd3ALcBBYIi1tsaLtLS06iOofxljbVYtXcCAwdm0Tz4uYstUSWrDaG5lnKo+an4atCTVGHMt0MFa+6L/cajdR09Za8v90+8E4gAvsMta+44xZhi+MCjAFwzdgSPW2sdC9HcTcJP/wSmfb/PWecwtWXMqZZSWQaHQ/DRoSaq19s/AXmPMVP/jULuP3jLGDDbGuIAJwHpgLb5jCuDbasjz/14KPA3cYIzpGKK/16y1w621ww217ytvTVTKKCKRVO/qI2vt28aYy40xV1lr3wnR5FFgBmCAD621C40xMcATxpjngR/4f671L++AMeZ14H7/j9RCpYwiEmnHdJ6CtfaDGqblACdXec7rrzi6EHjeWruVgIPL1trpxzKe1kSljCLSEBr9KqnW2sP4DjrLMVApo4g0BF06u5nK6HciCW3a8vozD3PDlIc5a/wVnDX+Cmf6Vys/Izkllcz+A0KWMg4//RynlHHKj89lb+EuUrt0i94KiUiToGsfNWPdehzPpOvu4C+//3XQtF69s3jp8Xt58KbLOX/C1cTFxzPxutv5xW1Xsm9v4VEXv/vJrfdXe7BaRFoXXSVVROpMJanNj66SKiIidaZQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEJEmLU6fUo1KF8QTkSZt7uvTKC0tjfYwWg1lsIg0aQqExqVQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgUREXEoFERExKFQEBERh0JBREQcCgURaRWstWzatCnawwhbbm4uixcvZuvWrY3ar0JBRFqFr7/+mo4dOwKwZ88ePvroIwA8Hg8ffvgh7733HitWrAg5b25uLh988AGfffZZ2P0F9lFp4cKFfPfddyHb79y5k5kzZ7JgwQK8Xi9ZWVkMGzaMgwcPht1nJCgURKRVKC4upkuXLhw5coRNmzZx5MgRAA4cOMDgwYO58sor2bFjR9B81lrWrl3LZZddRps2bar9UA9UtQ/wfejHxsbSqVOnkPOsWLGCSy+9lF69erFt27Z6ruWxUyiISIt36NAh2rVrB0B8fDxjxoxxpnXs2JHjjz+egwcPkpCQEDRvRUUF7du3xxhD586d2b9/f639Ve3DWsuiRYvYt28fCxcuxOv1Bs3jcrmIi4sLu4+GolAQkRbPGFNrmyVLlnD66acHPe9yubDWAuD1eo/69h+uPXv2kJyczOWXX0779u1DbglUjrG+fUSKQkFE6qX8UGmz6aNNmzaUlla/rC+++IKePXuSmpoaNM3lcgG+D+vCwkI6dOhQ5/7j4uJISUkBIDY2NmRIJSYmUl5eXu8+IiU2aj2LSLM29/Vp0R5CnaSkpJCfn0/37t2Pev4///kPq1atom/fvuzatYtzzz2XpUuXMnbsWKfN0KFD+eijj0hKSiI9PZ2NGzeSlpZG586dw+7b6/XyySef4Ha7GTp0KMuWLWPUqFFO6IwYMYL58+cTGxvLeeedF7H1ritTuVnUXMTEuOzybZ5oD0OkVTutJ9x7773RHkadbdmyhb59+x7zcvLy8ujatSuJiYkRGFVoubm5FBQUkJGRQWZm5jEv75lnnvFaa121tdOWgoi0GpEIBICMjIyILKcmWVlZZGVlNXg/VemYgohImEJVDbU0CgURkTBt37496KzoGTNmMH/+fFavXg3A2rVrmT17tvN41qxZAMybN4+CgoLGHXA9KBRERMKUkZFBYmIiOTk5gO/Et8zMTM4//3yys7MpKysjLy+PCRMmUFJS4pSW5uTkkJqaSnp6ejSHHxaFgohIHWRmZpKcnMy6desoLCwkPz+fDz/8kI0bN1JaWkpaWhoAqampFBcXc/jwYZYtW8bQoUOjPPLw6ECziEgdeTweYmNjyczMpE+fPrhcLmbOnEl6erpz3MHr9VJeXo7X62Xw4MFs2LCBYcOGRXnktdOWgohIHeTm5nLkyBEGDRrEt99+S3l5OR6PB7fbTXJyMsXFxQAUFRXRoUMHkpKSOPXUU9m8eTMVFRVRHn3ttKUgIhKmrVu3kpCQ4JSkdurUiXnz5uFyuRg+fDgxMTH079+fOXPmkJaWRvv27QHfWdGDBg1i/fr1DB8+PIprUDudvCYiddZcT15rzcI9eU27j0RExKFQEBERh0JBxC9O/w1h0evUsulAs4jf3Nen1Xh5ZfFp164dw6dNi/YwpIEo80X8FAjh0evUsikURETEoVAQERGHQkFERBwKBRFxWGuDLg3dlOXm5rJ48WK2bt0a7aG0GKo+EhHH119/TdeuXQHYs2cPK1eu5OKLLwbgs88+48CBA/Tp0yfkvDt37mTlypV06NCBcePGERNT+3fOkpISPvnkEyZNmgTAp59+yv79+znuuOM499xza+0jKyuLrl27smPHjvquslShLQURcRQXF9OlSxeOHDnCpk2bnPsB7N27l7KyMi655JJqP4BXrFjBpZdeSq9evdi2bVtY/a1du5bKS+0cOnSIXr16MWnSJIqLi3G73RHpQ+pGoSAigO9DuV27dgDEx8czZswYZ9qBAwfo0qULAElJSSHLUl0uF3FxcXTu3Jn9+/eH1efYsWNxuXyX42nbti1ZWVlUVFTg9XqJjQ3ekVGfPqRuFAoiAoAxptppLpfrqPsEVF4eOtT8Xq/X2cKoj+XLl5OdnV3jGI+1D6meQkFEAGjTpk21J6Z16dKFPXv2ALBv3z5SU1OD2iQmJlJeXk5hYSEdOnSo1xj+9a9/ERcXx/HHHx9yeiT6kJopFETEkZKSQn5+ftDzbdq0IS0tjTlz5tC7d2/i4+NZtmwZHs/3l7EfMWIE8+fPZ9u2bfTr14+8vDzy8vLC7vvIkSMsWLCAkpIS5s+fz8GDB2vtQyJP91MQwXeRt3eePbb7A1hr2bx5MyeeeGKERhVZq1evZt++fZxxxhkkJSVV227Lli307du3xmU9/fTTtd5PoaioiNjYWDp27Fiv8YYjNzeXgoICMjIyyMzMbLB+WoJw76fQDEtSLbOe1809JLIqD7Aei8pyzsLCQj7//HPcbjejRo2ie/fuQW0DyzsHDhwY1vJzcnI4dOgQ2dnZuN1u5s6dS0xMDCNHjqRz585B7deuXcuOHTtIT08nOzub7OxscnJyQlb1BKotEGrj9XqJiYkJOaZIy8rKIisrq8H7aU2a3e6j5rZlI81DJC7yVlnOuWbNGi644AIuvPBCVq9eHdQunPLOqgoKCsjPz3f+/jds2ED//v258MILWbduXVD7srIy8vLymDBhAiUlJY16UHb79u1BJ8DNmDGD+fPnO6/H2rVrmT17tvN41qxZAMybN4+CgoJGG6sEa3ahINIUBZZzAlRUVFBcXBxyN03V8s5wPrDT09MZMGBA0DJcLpdT0hmotLSUtLQ0AFJTU0NWCzWUjIwMEhMTycnJccaamZnJ+eefT3Z2drWBlZOTQ2pqKunp6Y02VgmmUBCJgMByztNOO43ly5fzwQcfMHjw4KC2Vcs76/MtPjY21llGWVlZrX2Ul5fXuY9jkZmZSXJyMuvWraOwsJD8/Hw+/PBDNm7cGDKwDh8+zLJlyxg6dGijjlOCKRREIiCwnLNTp06MGjWKjIwMZ4sgUNXyzjZt2tS5v8pjFxUVFSGPESQnJztbB0VFRVEp3/R4PMTGxpKZmclll13GJZdcwqZNm0IGltfrZfDgwWzYsKHRxylHUyiIREhgOeeqVasYNWoU4Psmv3LlSqdd1fJOl8vFkiVL6tRX79692b17N3PnzmXEiBEARy0jJiaG/v37M2fOHNq3b0/79u2PbeXqKDc3lyNHjjBo0CC+/fZbysvL8Xg8uN3ukIGVlJTEqaeeyubNm6moqGjUscrRmmH1UcvS1MsYq1q6dCklJSVceOGF0R5KkzNo0CC2bNkCcNTF3BITExk5cuRRbYcNG8awYcOcx5UXoatJz5496dmzJ+D70B83btxR08eOHXvU4xNOOIETTjjBeVxZktrQpZtbt24lISGBjIwMwLflNG/ePFwuF8OHDz8qsNLS0pzAcrlcDBo0iPXr1zN8+PAGHaNUT6EQZZVljLWVGHq9XhYsWMDhw4dJS0tj9OjRLFmyhP3799O2bVvOPfdcvvjiC/Lz83G5XFxwwQUhrx0TqD6lk2PGjOHzzz+P6GvQktS3nDPww7uhVHfpiEirGjqpqalMnDjxqOeqBtYVV1wBwMknn9zwA5QaafdRlFWWMdZWYrht2zY6d+7MZZddxp49e5zdFBMmTCAmJoadO3eye/duJkyYQNeuXcO6vnxDlE5K/VTuYxeJtoiEgjFmvDEmORLLak0CyxhrKzH0er3O1SPLysqIi4ujoqICay0lJSUkJyfjdrux1lJcXEzbtm3DGkOkSyelflTbL01FpLYU1gMvGWOOq3zCGBNrjNlujFni/znJ//wjxpg1xpjf+R9PM8b8xBjj8rcLvrNGCxVYxlhbiWFmZibl5eW88847dO3alc6dO5OcnMzMmTOJiYmhQ4cO9OvXj9mzZ1NQUBBWrXdjlE5KeFTbL01FRELBWpsP3Ae8YIxJ8T99MvC2tXas/2eDMeYUYDSQDRQZYwKPlE0DlllrP43EmJqDwDLG2koMY2JiGDNmDO3bt+fUU08FfPuIU1JSOP300wHfgc709HRGjx5d42WQKzVG6aSET7X90hRE7JiCtXY3cC/wnDGmIzASuMgYs9oY84YxJhY4E5hlfefqzwfO8M9+NnAuvmBoVSrLGGsrMQTfrQjT0tKcXUOVNxmpvIxxWVkZRUVFzoG+jRs3UlRUVGP/dS2dlIal2n6JtohWH1lri4wx3+ILhDXAOGvtLmPMm8B4IAn4t7/5PqALsNs/7SDQFiiJ5JiausoyxnBKDHv06EGPHj2cx8cdd9xR8yQmJjr30wXfcYDaTlqqa+lkZUmqRF5ubi7GGAYNGkRubi49e/YkISEhZG1/3759ndr+d999l5NOOom4uLgor4G0BBENBWPMz4H11tp5xpgEa23lufVfAP2AUqByH0Q7vt9SmQZ0B34GPBZiuTcBN0VyrE3JsVyVsvKKlKFU1olHUuAtGiVyVNsvTUXEQsEYMx1Ybq392P/UW8aYx4EcYAIwHTgCXAm8AwwG8oB4fGHxNLDOGPM7a+2+wGVba18DXvP3o8ukBti+fTuHDx8+6uS3GTNm0KlTJ1JSUsjOzg66hPKsWbO44oormDdvHkOGDNFByiZAtf3SVESqJPUWYHFAIAA8CrwFrANWWGsXAv8AhhpjngceAN6ubGytPQC8DtwfiTG1FqpaEZFIisiWgrX29yGey8FXgRT4nNdfcXQh8Ly1disBB5ettdMjMZ7WJjMzkx07drBu3Tratm3rVK306dOHLl26VFu1Mnny5CiPXESamka/zIW19jAws7H7bekCq1b69OmDy+Vi5syZpKen11i1EngQWUREl7loAXRFShGJFF0Qr5lT1YqIRJJCoZlT1YqIRJJ2H4mIiEOhICIiDoWCiIg4FAoiIuJQKIiIiEOhICIiDuO7tUHz4b8gnm5oGx4DNK83WKpqSe9hS1qXqprDusVYa2u9+1azCwUJnzHmC2utzkxrxlrSe9iS1qWqlrRu2n0kIiIOhYKIiDgUCi3ba9EegByzlvQetqR1qarFrJuOKYiIiENbClFgjMmI9hgixRhzqzFmiTFmYbTHEi3N9f00xjzif+/+Gu2xRIox5lX/Oj3tf5wR3RFFTtV1aygKhUZmjLkfaDF3trHWvmKtHQvsjPZyBSo5AAAEWElEQVRYoqE5v5/W2l8CfwOSoz2WSLHW3uz/e0xtzu9NKIHr1pD9KBQakf9bS09r7Qf+x28YY1YYY35ewzyZxpiPjTHLjDHP1OW5Oowr7HnDGXNrEfh+NtZraIzpYoxZFvA4zhjzkTFmuTHmhhrmC2pXOX5ge6SXXcd1OtEYMyfg8VD/spYaY35Zw3zH+dssN8ZcUGVyO47+X6u6br3837oXGWNeM8aErN+P9LoFLPMfxphqb5Bey7o1KIVC47oG+B2AMeZywGWtPQ3obYzpV808TwGPWWvPAHoYY8bW4blwhTVvHcbcWjjvJ43wGhpjUoA/A0kBT98BrLXWng5MNMa0r2b2UO0C/x4jvexw16kP8BuO3lq5BbjMWjsGuNIYU92WzKPAH4ExwNQqH+y9a1m3m4FbrbVn4wvGkxpp3QCmAjOttQU1zF7TujUohULj6mOt3eT/fSzwnv/3BcDoaubpD3zp/70I3x9YuM+FK9x5wx1zaxH4fjbGa+gBfggUV7O8pUB1J1CFahc4/kgvO1wlwBWBT/h3kxQZY+Lw3QjsUDXzjsH34eoBcoGMgGkdalo3a+1DAdM7AXur6WMsEVw3/5eAu4B4Y0xNy6pp3RqUQiF6koB8/+/7gC7VtJsJ/NIYczHwA+DvdXguXOHOG+6YW6MGfw2ttcXW2gP1XF6N7Rpy2TWx1hZZa8urmXw3MMNaW92NxN3W2tLa+q1m3QAwxvwQ2FjDt/ZIr9vD+LYe/g94xBgztJrZw1q3hqBQaFyHjTHt/L+XAm38v7ejmvfCWvsrfAcDJwN/ttaWhvtcuIOqw7xhjbkVcd7PKL6G4S4vVLvAv8dIL/uYGGNOBcYD02to5gn4vWq/7lrWDWNMb+Be4Gc1NIv0ug0FXrDW5gNzgTOqaVfTujWo1v5P3djm8f3m5Fq+33UwGMirYb51QC/gt/V4LlzhzFuXMbcGge8nROc1DHd5odpVHX8kl11v/gPgLwM/rmErAWBjwC6YwcC2gGk7qWHd/McZ3gZuqG4rwi/S79cWoPLG6sM5esyBalq3hmWt1U8j/eC7kuJ7QGegA7Ae3wfIJnz7oAcAvwox3yPANXV9DugOPBfm2KrOGzSWUGMOmPanaL++0Xw/j+U1rMv75F/GkoDfjwc2As8DawAXcDZwe5V5QrWrHP87DbDsU4AH6rlO7wJbgSX+nyzgx8DEKvOM8L+WrwFzq0z7U+B7E6KPp4BdAX2c2UjrNgBYBKwC3sF3zKTO69agf9fR/sdqbT9AD3zfgABSgCuBrg3UlwuYEuFlHjVm4Fb/P9XCaL+20X4/j+E1PKb3CUj3Ly+5Lu3whdgK4B8NsOyOwHWN8Pr3BSYCCf7Hr/r/Hp+uz3vTXNatIfvVZS5aMGOMC4iz1pZFeyxSvZb4PhljEgCPtdYd7bFEWkteN9C1j0REJIAONIuIiEOhICIiDoWCiIg4FAoiIuJQKIiIiEOhICIijv8HGJ2vLwWmrzYAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "### 将交叉报表可视化\n", "from statsmodels.graphics.mosaicplot import mosaic\n", "\n", "\n", "props = lambda key: {\"color\": \"0.45\"} if ' >50K' in key else {\"color\": \"#C6E2FF\"}\n", "mosaic(cross1[[\" >50K\", \" <=50K\"]].stack(), properties=props)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
label<=50K>50K
hours_per_week
(0.902, 20.6]0.9334020.066598
(20.6, 40.2]0.8109910.189009
(40.2, 59.8]0.5992000.400800
(59.8, 79.4]0.5846700.415330
(79.4, 99.0]0.6480940.351906
\n", "
" ], "text/plain": [ "label <=50K >50K\n", "hours_per_week \n", "(0.902, 20.6] 0.933402 0.066598\n", "(20.6, 40.2] 0.810991 0.189009\n", "(40.2, 59.8] 0.599200 0.400800\n", "(59.8, 79.4] 0.584670 0.415330\n", "(79.4, 99.0] 0.648094 0.351906" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "### 计算hours_per_week, label交叉报表\n", "cross2 = pd.crosstab(pd.cut(data[\"hours_per_week\"], 5), data[\"label\"])\n", "### 将交叉报表归一化,利于分析数据\n", "cross2_norm = cross2.div(cross2.sum(1).astype(float), axis=0)\n", "cross2_norm" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAEJCAYAAACAKgxxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAFtlJREFUeJzt3X+U3XV95/HnW4gnOUQjP8IPiXFIoa7UEIgBzR4wgxLFH10UaZvIoqUVWEXR7mZPUVBSmu0KZpHTKAKC7rYoHmwrriSskMWJYBPZpBj8QVnrNmqioTShuEE4C/G9f9xvZm6GO5nvvXOTe/PJ83HOnPnOnc/n+3nnk++85jufufczkZlIksrxgl4XIEnqLoNdkgpjsEtSYQx2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVJiDezHoEUcckQMDA70YWpL2Wxs2bPjnzJw+XrueBPvAwADr16/vxdCStN+KiJ/UaedSjCQVxmCXpMIY7JJUmJ6ssUvSWJ599lk2b97MM8880+tSemby5MnMmDGDSZMmddTfYJfUVzZv3syLXvQiBgYGiIhel7PPZSbbtm1j8+bNHHfccR2dw6UYSX3lmWee4fDDDz8gQx0gIjj88MMn9BOLwS6p7xyoob7LRP/9BrskFcZgl3TAGBwcHPNzS5cuZWhoqCvn6rX99pena3828XPMf9nEzyFJ/cY7dkkHnB07dnD22WdzxhlncOGFFw4/ft1117FgwQIWLVrEzp07yUwuuugiFixYwHnnncfOnTt7WHV9BrukA84vfvELPvjBD7J69Wo2bdrEY489BsC8efNYs2YN06ZN4+tf/zpf+9rXePbZZ1mzZg0zZ85k5cqVPa68nv12KUaSOjVp0iRuueUWvvCFL7B9+3aefvppAF7zmtcAMHfuXH784x/z3HPPsXbtWgYHB9mxYwevfOUre1l2bd6xSzrg3HrrrZx33nncfvvtHHLIIcOPb9iwAYCHH36YgYEBXvGKV7Bo0SKGhoa4/vrrOfHEE3tVclu8Y5d0wFm4cCHvf//7ufHGGwHYsmULAPfffz8LFixg+vTpnHPOObzgBS9g5cqVLFiwgIjgtttu62XZtUVm7vNB582blxPdj91nxUhleuSRR/abJY+9qdU8RMSGzJw3Xl+XYiSpMAa7JBXGYJekwhjsktSGxx57jPvuu6/XZeyRwS5JNW3dupUlS5Zw8sknt/z8c889x8yZMxkcHGRwcJDvfe97AFx11VWceuqpXHrppUBjX5rbbruNnTt3Mjg4yL333tvVOn26o6S+1o1nwDXr9NlwP//5z7n88stZsWIFL3nJS7jkkkt49NFHhz//+te/nre97W0sXryYa665ZvjxDRs28MADD/Dggw9y9dVXs3r16uHPLV26lDPOOIOFCxd2/O9pxWCXpHFs2bKFK664gk9/+tO8+MUvBuCmm256XrsbbriBu+66i29+85vMnj2bm266iTVr1vDOd76TiOBNb3oTd999NxHBfffdxw9/+EO+/e1vd71el2IkaRxDQ0PMnTt3ONTHcuqpp7J69WoefPBBnn32WVatWsVTTz3FscceC8Bhhx02vC/NqlWrePzxx/nVr37V9XoNdkkax/nnn8+0adNYsWLF8GOXXHLJ8Fr64OAgV199NSeddBLHHHMM0NhQ7Ec/+hFTp04d3otmx44d/PrXvwYayzDvete7uP7667ter8EuSTW85z3v4YgjjuDaa68FGksxQ0NDw28f//jHueCCC9i4cSM7d+7kzjvvZM6cObz61a/mgQceAGDjxo0MDAwAMHXqVJYsWcLnP/95tm/f3tVaDXZJqmnx4sUcf/zxfPnLX275+V3hfvLJJzN//nzOOussTj/9dB566CE+9KEP8YlPfILFixcPt582bRoXXXTRbr9s7Qb3ipHUV0bvkdIvz4qZiKeffpqVK1cyd+5cZs2aVavPRPaK8VkxkvpaCTdgU6ZM4bzzzttn47kUI0mFMdglqTAGuyQVxmCXpMIY7JI0jlWrVvHkk0/2uozafFaMpL62ZMmSrp5v+fLlbfeZM2cOH/jAB4Y3AIPGTo6zZs0afvriihUrmD17NldddRWrVq3itNNO4zOf+QxLly7l+OOPZ/HixbzhDW/giiuu6PqmX6N5xy5J4zj22GP55Cc/yWWXXcYTTzwBwMMPP8zixYuHX3k6e/bs3XZyPPLII/fJTo6t1Lpjj4hbgROBlZm5rMXnDwW+CBwJbMjMS7papfbIF2tJe9/RRx/N8uXL+fCHP8ynPvUp1q1b1xc7ObYy7h17RJwLHJSZ84FZEXFCi2YXAF+sXhH1oogY95VRkrS/OfLII5k1axbr1q3rm50cW6mzFDMI3FEd3wOc3qLNNuBVEfES4GXA8+4hI+LiiFgfEesff/zxDsuVpN5ZtmwZc+bM4S1veUvf7OTYSp1gPwTYUh1vB45q0eYB4OXAZcAjVbvdZObNmTkvM+dNnz69w3IlqTc++tGPcsopp/D2t78doG92cmylTrDvAKZUx1PH6HMV8O8y82rg74ELu1OeJPXejTfeyJlnnslb3/rW4cf6ZSfHVsbd3TEi3g0cmZnLI+JPgEcz80uj2nwVWA6sA74ErM7Mz411Tnd37C7nQiVptavh/qSTnRxb2du7O94J3B8RLwXeDCyKiGWZeWVTm/8MfIHGcsxa4Pa6xUtSSfb1To6tjBvsmfnLiBgEFgLXZuZWYOOoNg8Cv7VXKpQktaXW89gz8wlGnhkjSXtVZhIRvS6jZyb6B5B85amkvjJ58mS2bds24XDbX2Um27ZtY/LkyR2fw71iJPWVGTNmsHnzZg7k17tMnjyZGTNmdNzfYJfUVyZNmsRxxx3X6zL2ay7FSFJhDHZJKozBLkmFMdglqTAGuyQVxmCXpMIY7JJUGINdkgpjsEtSYQx2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYUx2CWpMAa7JBXGYJekwhjsklQYg12SCnNwrwuQpL1t7c8mfo75L5v4OfYV79glqTAGuyQVxmCXpMIY7JJUGINdkgpjsEtSYQx2SSpMrWCPiFsjYm1EXDlOuxsi4re7U5okqRPjBntEnAsclJnzgVkRccIY7c4Ajs7Mr3e5RklSG+rcsQ8Cd1TH9wCnj24QEZOAzwGbIuKcrlUnSWpbnS0FDgG2VMfbgbkt2rwb+CFwLfDBiJiZmSuaG0TExcDFADNnzuy4YGlPDrSXjkut1Llj3wFMqY6njtHnFODmzNwK3AacObpBZt6cmfMyc9706dM7rVeSNI46wb6BkeWXOcCmFm3+AZhVHc8DfjLhyiRJHamzFHMncH9EvBR4M7AoIpZlZvMzZG4FPh8Ri4BJwHndL1VSO1yWOnCNG+yZ+cuIGAQWAtdWyy0bR7X5v8Dv7JUKJUltqbUfe2Y+wcgzYyRJfcxXnkpSYQx2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYUx2CWpMAa7JBXGYJekwhjsklQYg12SCmOwS1JhDHZJKozBLkmFMdglqTAGuyQVxmCXpMIY7JJUGINdkgpjsEtSYQx2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYU5uE6jiLgVOBFYmZnL9tDuKOB/ZOYpXapPUg8tWbJkQv2XL1/epUrUjnHv2CPiXOCgzJwPzIqIE/bQfDkwpVvFSZLaV2cpZhC4ozq+Bzi9VaOIeD3wFLC1K5VJkjpSZynmEGBLdbwdmDu6QUS8EPgY8A7gzlYniYiLgYsBZs6c2Umt0j7h8oP2d3Xu2HcwsrwydYw+lwM3ZOa/jHWSzLw5M+dl5rzp06e3X6kkqZY6wb6BkeWXOcCmFm3OAi6NiCHg5Ii4pSvVSZLaVmcp5k7g/oh4KfBmYFFELMvMK3c1yMzX7TqOiKHMfG/3S5Uk1TFusGfmLyNiEFgIXJuZW4GNe2g/2LXqJEltq/U89sx8gpFnxkiS+pivPJWkwhjsklQYg12SCmOwS1JhDHZJKkytZ8VI0oFuf9pqwjt2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYUx2CWpMAa7JBXGYJekwhjsklQYg12SCmOwS1JhDHZJKozBLkmFMdglqTAGuyQVxmCXpMIY7JJUGINdkgpjsEtSYQx2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVJhawR4Rt0bE2oi4cozPT4uIuyPinoj4akS8sLtlSpLqGjfYI+Jc4KDMnA/MiogTWjQ7H7guM98IbAXO7m6ZkqS6Dq7RZhC4ozq+Bzgd+FFzg8y8oenD6cA/daM4SVL76izFHAJsqY63A0eN1TAi5gOHZua6Fp+7OCLWR8T6xx9/vKNiJUnjqxPsO4Ap1fHUsfpExGHACuAPWn0+M2/OzHmZOW/69Omd1CpJqqHOUswGGssv64A5wKOjG1S/LP0K8JHM/ElXK9Q+sWTJkgn1X758eZcqkTRRde7Y7wQuiIjrgN8FfhARy0a1+UNgLnBFRAxFxO91uU5JUk3j3rFn5i8jYhBYCFybmVuBjaPafBb47F6pUJLUljpLMWTmE4w8M0aS1Md85akkFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYUx2CWpMAa7JBXGYJekwhjsklQYg12SCmOwS1JhDHZJKozBLkmFMdglqTAGuyQVxmCXpMIY7JJUGINdkgpjsEtSYQx2SSqMwS5JhTm41wX00pIlSybUf/ny5V2qRJK6xzt2SSqMwS5JhTHYJakwBrskFcZgl6TCGOySVBiDXZIKY7BLUmEMdkkqjMEuSYUx2CWpMLWCPSJujYi1EXHlRNpIkva+cYM9Is4FDsrM+cCsiDihkzaSpH2jzh37IHBHdXwPcHqHbSRJ+0Bk5p4bRNwK/HlmboyINwJzM/MTHbS5GLi4+vAVwKPd+kdMwBHAP/e6iD7hXIxwLkY4FyP6YS5enpnTx2tUZz/2HcCU6ngqre/yx22TmTcDN9cYb5+JiPWZOa/XdfQD52KEczHCuRixP81FnaWYDYwsrcwBNnXYRpK0D9S5Y78TuD8iXgq8GVgUEcsy88o9tHlt90uVJNUx7h17Zv6Sxi9H1wFnZubGUaHeqs2T3S91r+irpaEecy5GOBcjnIsR+81cjPvLU0nS/qVvXnkaEQO9rmFvioj3RcRQRKxus9/A3qlo3+t0Dpr6D3S3ot6JiHdUc/H9DvoOdL+i3pjIPDSdY6B7FfXWRL9GdumLYI+IPwbm9rqOvSkzP5uZg8Dmun1Km5dO5mCXAufiq9VcrG+nn/OwuwLno+OvkWY9D/bqu+3LMvNvqo/rbF9wXESsjIj7I+K/ND2+W9+ImBYRd0fEPRHx1Yh4YRt1tezbztYJEXFDRPx23TFH9R2gmpc+qOWoiHio6eM6/0cdz32Lcw3QdI30oqaIODgiflrdTQ1FxOyIODwivlJ9/BcRMWmMvi+MiDsiYl01bst2NWoYYOSa6Ek9TXfYQ1X/b9Qdt+kcUyLi/7T1j299ngFG5uN9TXV9NyJuGisn9nC+3a6pmjU8b4x2xm3na7gdPQ924ALgM9DW1gTXAH+amWcAMyJicIy+5wPXZeYbga3A2W3U9by+bdRHRJwBHJ2ZX29jzGbD89IHtSynep1CG+NOZO5Ha56LXtV0EnB7Zg5Wb98D/hj46+oO6xfV+Vs5G9iYma8FHgHOaWPcZs3z0JN6dt1hV2P8NXBTG+PuciVwTN0x92B4Pnbd6VY13A98jhY5Mc75hq+pNrQao9a47XwNt6sfgv03MvOR6niQelsT/Cbwd9XxPwHTWvXNzBsy897qselV21rG6FurvuqO5XPApojo9It4eF56WUtEvB54ikYQUnfcicx9C83XSK9qei3wtoh4sLrLOpjW12Er24ATI2IqcCLwozbGbdY8Dz2tJyKmAG+sfoqqOy4R8a9ofFP6TrtjtrDbdVGd/1jgqMxc32Zdo6+pulqNUXfcQfbSViz9EOzNDgG2VMfbgaPGaPdXwFXRWFo4G/ife+obEfOBQzNzXbsFjepbt753Az8ErgVOi4gPtjtuP9RSLVV8DLi86eG647aqecJ6WNP/As7KzNOAScBbaNyxXh4RC4E/BL42Rt8fVO8voxEeE16G6IN6LgC+VB3XHRcad8WXdTBeXZcCn62OW+XE84xxTdXVaoxa49LmdduOfgj2p6s7B6i3fQGZuQy4G3gv8N8yc8dYfSPiMGAF8AftFtaib636gFOAmzNzK3AbcGa7Y7P7vPSqlsuBGzLzX5oeqzvuhOZ+lOa56FVND2fmL6rj9cAJmfmXwKeB3wP+NjM3jdH3MuCazPwzGiH4kTbH3qV5Hnpdz2LgKwB1x42IdwNrMvMfOxivldFfIy+gcX0PVXW1yolWWl1TtbQao41xa1+37eqHYF8FvLM6bmdrgu8CM4HrxupbfSf+CvCRzPxJO0WN0bduff8AzKqO5wFtjV0Znpce1nIWcGlEDAEnR8QtdcedyNy30HyN9Kqmv4yIORFxEPB2YGP1+HeBVwEf30PfQ4HZ1fG/Bjp98UjzPPSsnuqXlk+OCqw6454N/Jum/7u72hm3heb5ADgD+E7u/uKc0TnRSqtrqh2txqgz7t7biiUze/oGBI11piOBF9O4QK+j8UudaTTWAJe16PcnwAVNH7fq+z7gCRrfwYdo3FEcC1xfo65WfWvVB7yIRoB8C1gLHNv0uf/awbx0tZa6czDqPEN7mOdW4z6v5nbnoNVcdLumNq6HVwEPA98D/lPT4xcCH2v6+DDgllF9fwN4kMayx3c6uR5aXBNdrQc4CPhyzTouBv79qMfGHbfV/12n10Sr6wL4M+DcUW1G50TLPGlxTdX+Ghk9Rt1xW123E5mP3c49kc7degNmAO+qjg8FfpfGszjaPc+4fasL+EMTqLWj+qpQGQJWdzIvXa5lv5mDfp+LCczhO6q5+H4352GCNf2H/WUe9vZ87KvrYvR1O5Gvkea3A25LgepH10mZ+Uyva+kV52CEczEiIqbm2OvBB5T9/bo44IJdkkrXD788lSR1kcEuSYUx2CWpMAa7JBXGYFdfiIilNTZpKla1kd3SXtehMhjsklQYg139ZGFEfKvaT/vlEXF7RKyJiC9GYw/x4bv6iPj9iPj96ngoIj4ZEd+oPp4SEXdV5/pqtfPh81Tnu7sa46+iscd5RMTnmh47qGmMP4qIh8cqPiLujYjDImJbRBwREetbnW+sMapzHBONfbyP7d606kBjsKufHJ+ZrwP+BngPjVcjLqCxreyeNu16LbA2M99UfXwi8OvqXF+gscHSWO6vxniMxr7k59B4YcoC4KfAW6t2xwCZmSft4VwPVe3/tnr/92Ocb6wxpgL/HbgkM7cgdchgVz/5i+r9T2nsuLdrz+51wCtHtW3+gwjfz6a/rkRjL+zvR8Q9wJuAX+1hzA3V+4eBAeAVwPxqQ6jXMbKV6pPAn49T/9/ReHn4SuB3qnO3Ot9YY7wX+Eca+8BIHTPY1U+eajr+jzTuxKne/wD4fzT+QAbs/tePRr8Mfg7w7Wz8paRDaez6N5bTqven0NgJ81Eam2ENAh+msZc9wK8y89fj1P9QVdc3gDfQCPpW5xtrjBtp/GTyR9Hhn8+TAFquPUp94DngtyLiW8DPaOzc95vADRHxBhp/BWgsm4BrIuIK4Bn2/IeST63unLcCd9HYwvatEbGmOv63bdT8v6u6NtH4qeMhGt90Rp9vbYvHjgeeycwdEfEVGptBjfcTgtSSe8XogFU9vXAoM4d6XIrUVd6x64BQ3ZU3ezIzO/17tETEl4GjRz385sx8utNzSt3iHbskFcZfnkpSYQx2SSqMwS5JhTHYJakwBrskFeb/AwXyYnlplV9QAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "### 图形化归一化后的交叉报表\n", "cross2_norm.plot(kind=\"bar\", color=[\"#C6E2FF\", \"0.45\"], rot=0)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# 将数据分为训练集和测试集\n", "from sklearn.model_selection import train_test_split\n", "\n", "\n", "train_set, test_set = train_test_split(data, test_size=0.2, random_state=2310)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Optimization terminated successfully.\n", " Current function value: 0.406094\n", " Iterations 8\n" ] } ], "source": [ "# 搭建逻辑回归模型,并训练模型\n", "import statsmodels.api as sm\n", "\n", "\n", "## 可以方便地定义新的变量,比如 age * education_num\n", "formula = \"label_code ~ age + education_num + capital_gain + capital_loss + hours_per_week\"\n", "model = sm.Logit.from_formula(formula, data=train_set)\n", "re = model.fit()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "检验假设education_num的系数等于0:\n", "\n", "检验假设education_num的系数等于0.32和hours_per_week的系数等于0.04同时成立:\n", "\n" ] } ], "source": [ "## 分析逻辑回归模型的统计性质\n", "### 用f test检验education_num的系数是否显著\n", "print(\"检验假设education_num的系数等于0:\")\n", "print(re.f_test(\"education_num=0\"))\n", "### 用f test检验两个假设是否同时成立\n", "print(\"检验假设education_num的系数等于0.32和hours_per_week的系数等于0.04同时成立:\")\n", "print(re.f_test(\"education_num=0.32, hours_per_week=0.04\"))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: label_code No. Observations: 26048
Model: Logit Df Residuals: 26042
Method: MLE Df Model: 5
Date: Sun, 24 Feb 2019 Pseudo R-squ.: 0.2639
Time: 22:14:24 Log-Likelihood: -10578.
converged: True LL-Null: -14370.
LLR p-value: 0.000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [0.025 0.975]
Intercept -8.2970 0.128 -64.623 0.000 -8.549 -8.045
age 0.0435 0.001 31.726 0.000 0.041 0.046
education_num 0.3215 0.008 42.231 0.000 0.307 0.336
capital_gain 0.0003 1.07e-05 29.650 0.000 0.000 0.000
capital_loss 0.0007 3.64e-05 20.055 0.000 0.001 0.001
hours_per_week 0.0399 0.001 26.995 0.000 0.037 0.043
" ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: label_code No. Observations: 26048\n", "Model: Logit Df Residuals: 26042\n", "Method: MLE Df Model: 5\n", "Date: Sun, 24 Feb 2019 Pseudo R-squ.: 0.2639\n", "Time: 22:14:24 Log-Likelihood: -10578.\n", "converged: True LL-Null: -14370.\n", " LLR p-value: 0.000\n", "==================================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", "----------------------------------------------------------------------------------\n", "Intercept -8.2970 0.128 -64.623 0.000 -8.549 -8.045\n", "age 0.0435 0.001 31.726 0.000 0.041 0.046\n", "education_num 0.3215 0.008 42.231 0.000 0.307 0.336\n", "capital_gain 0.0003 1.07e-05 29.650 0.000 0.000 0.000\n", "capital_loss 0.0007 3.64e-05 20.055 0.000 0.001 0.001\n", "hours_per_week 0.0399 0.001 26.995 0.000 0.037 0.043\n", "==================================================================================\n", "\"\"\"" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "### 整体统计分析结果\n", "re.summary()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "事件发生概率(预测概率)大于0.6的数据个数:\n", "576\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ] } ], "source": [ "# 使用训练好的模型对测试数据做预测\n", "## 计算事件发生的概率 \n", "test_set[\"prob\"] = re.predict(test_set)\n", "print(\"事件发生概率(预测概率)大于0.6的数据个数:\")\n", "print(test_set.loc[test_set[\"prob\"] > 0.6].shape[0]) # 输出值为576" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ageeducation_numcapital_gaincapital_losshours_per_weeklabellabel_codeprobpred
194635590040<=50K00.1952400
2443038100040<=50K00.1378150
1962136110060<=50K00.3099760
386241110184848>50K10.5713431
2754920100040<=50K00.0681340
\n", "
" ], "text/plain": [ " age education_num capital_gain capital_loss hours_per_week label \\\n", "19463 55 9 0 0 40 <=50K \n", "24430 38 10 0 0 40 <=50K \n", "19621 36 11 0 0 60 <=50K \n", "3862 41 11 0 1848 48 >50K \n", "27549 20 10 0 0 40 <=50K \n", "\n", " label_code prob pred \n", "19463 0 0.195240 0 \n", "24430 0 0.137815 0 \n", "19621 0 0.309976 0 \n", "3862 1 0.571343 1 \n", "27549 0 0.068134 0 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## 根据预测的概率,得出最终的预测\n", "alpha = 0.5\n", "test_set.loc[:, \"pred\"] = test_set.apply(lambda x: 1 if x[\"prob\"] > alpha else 0, axis=1)\n", "test_set.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01OR
Intercept-8.548604-8.045324-8.296964
age0.0407690.0461380.043453
education_num0.3065470.3363860.321467
capital_gain0.0002980.0003400.000319
capital_loss0.0006590.0008020.000730
hours_per_week0.0370380.0428380.039938
\n", "
" ], "text/plain": [ " 0 1 OR\n", "Intercept -8.548604 -8.045324 -8.296964\n", "age 0.040769 0.046138 0.043453\n", "education_num 0.306547 0.336386 0.321467\n", "capital_gain 0.000298 0.000340 0.000319\n", "capital_loss 0.000659 0.000802 0.000730\n", "hours_per_week 0.037038 0.042838 0.039938" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 理解模型结果\n", "conf = re.conf_int()\n", "conf[\"OR\"] = re.params\n", "conf" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "各个变量对事件发生比的影响:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2.5%97.5%OR
Intercept0.0001940.0003210.000249
age1.0416111.0472181.044411
education_num1.3587251.3998791.379149
capital_gain1.0002981.0003401.000319
capital_loss1.0006591.0008021.000731
hours_per_week1.0377331.0437691.040746
\n", "
" ], "text/plain": [ " 2.5% 97.5% OR\n", "Intercept 0.000194 0.000321 0.000249\n", "age 1.041611 1.047218 1.044411\n", "education_num 1.358725 1.399879 1.379149\n", "capital_gain 1.000298 1.000340 1.000319\n", "capital_loss 1.000659 1.000802 1.000731\n", "hours_per_week 1.037733 1.043769 1.040746" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## 计算各个变量对事件发生比的影响\n", "## conf里面的三列,分别对应着估计值的下界、上界和估计值本身\n", "import numpy as np\n", "\n", "\n", "conf.columns = [\"2.5%\", \"97.5%\", \"OR\"]\n", "print(\"各个变量对事件发生比的影响:\")\n", "np.exp(conf)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "各个变量的边际效应:\n", " Logit Marginal Effects \n", "=====================================\n", "Dep. Variable: label_code\n", "Method: dydx\n", "At: overall\n", "==================================================================================\n", " dy/dx std err z P>|z| [0.025 0.975]\n", "----------------------------------------------------------------------------------\n", "age 0.0056 0.000 33.563 0.000 0.005 0.006\n", "education_num 0.0413 0.001 47.313 0.000 0.040 0.043\n", "capital_gain 4.09e-05 1.3e-06 31.500 0.000 3.84e-05 4.34e-05\n", "capital_loss 9.372e-05 4.54e-06 20.648 0.000 8.48e-05 0.000\n", "hours_per_week 0.0051 0.000 28.167 0.000 0.005 0.005\n", "==================================================================================\n" ] } ], "source": [ "## 计算各个变量的边际效应\n", "print(\"各个变量的边际效应:\")\n", "print(re.get_margeff(at=\"overall\").summary())\n", "# print(re.get_margeff.__doc__)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }