{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# 使用pandas读取数据\n", "import pandas as pd\n", "\n", "\n", "# 关闭不必要的warning\n", "pd.options.mode.chained_assignment = None\n", "data_path = \"./data/adult.data\"\n", "raw_data = pd.read_csv(data_path)\n", "## 选取需要使用的列\n", "cols = [\"age\", \"education_num\", \"capital_gain\", \"capital_loss\", \"hours_per_week\", \"label\"]\n", "data = raw_data[cols]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | age | \n", "education_num | \n", "capital_gain | \n", "capital_loss | \n", "hours_per_week | \n", "label | \n", "label_code | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "39 | \n", "13 | \n", "2174 | \n", "0 | \n", "40 | \n", "<=50K | \n", "0 | \n", "
| 1 | \n", "50 | \n", "13 | \n", "0 | \n", "0 | \n", "13 | \n", "<=50K | \n", "0 | \n", "
| 2 | \n", "38 | \n", "9 | \n", "0 | \n", "0 | \n", "40 | \n", "<=50K | \n", "0 | \n", "
| 3 | \n", "53 | \n", "7 | \n", "0 | \n", "0 | \n", "40 | \n", "<=50K | \n", "0 | \n", "
| 4 | \n", "28 | \n", "13 | \n", "0 | \n", "0 | \n", "40 | \n", "<=50K | \n", "0 | \n", "