- 添加 .idea 目录和相关配置文件,设置项目忽略文件、编码、模块管理等 - 创建商务大数据分析目录和子目录,准备数据和任务笔记本 - 添加示例数据文件:中国城市人口数据.csv - 创建任务笔记本文件,进行数据处理和分析示例
479 lines
14 KiB
Plaintext
479 lines
14 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T07:57:56.382179Z",
|
||
"start_time": "2025-04-02T07:57:55.984261Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "import pandas as pd",
|
||
"id": "3244cf38b10be81b",
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T08:00:15.267189Z",
|
||
"start_time": "2025-04-02T08:00:15.229542Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"data = pd.read_csv('data/某地区房屋销售数据 (1).csv', encoding='gbk')\n",
|
||
"data['new_postcode'] = data['地区邮编'].apply(lambda x: str(x)[:2])\n",
|
||
"data.head(3)"
|
||
],
|
||
"id": "d973cf9fe6ac90a6",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n",
|
||
"0 2010/1/4 0:00 2615 435000 house 3 26\n",
|
||
"1 2010/1/5 0:00 2904 712000 house 4 29\n",
|
||
"2 2010/1/6 0:00 2617 435000 house 4 26"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>房屋出售时间</th>\n",
|
||
" <th>地区邮编</th>\n",
|
||
" <th>房屋价格</th>\n",
|
||
" <th>房屋类型</th>\n",
|
||
" <th>配套房间数</th>\n",
|
||
" <th>new_postcode</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>2010/1/4 0:00</td>\n",
|
||
" <td>2615</td>\n",
|
||
" <td>435000</td>\n",
|
||
" <td>house</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>26</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2010/1/5 0:00</td>\n",
|
||
" <td>2904</td>\n",
|
||
" <td>712000</td>\n",
|
||
" <td>house</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>29</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>2010/1/6 0:00</td>\n",
|
||
" <td>2617</td>\n",
|
||
" <td>435000</td>\n",
|
||
" <td>house</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>26</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 4
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T08:00:25.320359Z",
|
||
"start_time": "2025-04-02T08:00:25.301349Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 1、求出不同地区和不同房间数的房价,使用pivot_table函数\n",
|
||
"data.pivot_table(values='房屋价格', index='new_postcode', columns='配套房间数', aggfunc='mean')"
|
||
],
|
||
"id": "c9d4b29b2fbd4334",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"配套房间数 0 1 2 3 \\\n",
|
||
"new_postcode \n",
|
||
"26 564125.0 343189.962401 457595.588277 624204.46900 \n",
|
||
"29 528000.0 292934.514286 381675.627240 475210.25609 \n",
|
||
"\n",
|
||
"配套房间数 4 5 \n",
|
||
"new_postcode \n",
|
||
"26 810389.319007 1.037034e+06 \n",
|
||
"29 651102.874716 7.995584e+05 "
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th>配套房间数</th>\n",
|
||
" <th>0</th>\n",
|
||
" <th>1</th>\n",
|
||
" <th>2</th>\n",
|
||
" <th>3</th>\n",
|
||
" <th>4</th>\n",
|
||
" <th>5</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>new_postcode</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>564125.0</td>\n",
|
||
" <td>343189.962401</td>\n",
|
||
" <td>457595.588277</td>\n",
|
||
" <td>624204.46900</td>\n",
|
||
" <td>810389.319007</td>\n",
|
||
" <td>1.037034e+06</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>528000.0</td>\n",
|
||
" <td>292934.514286</td>\n",
|
||
" <td>381675.627240</td>\n",
|
||
" <td>475210.25609</td>\n",
|
||
" <td>651102.874716</td>\n",
|
||
" <td>7.995584e+05</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 5
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T08:04:02.430064Z",
|
||
"start_time": "2025-04-02T08:04:02.415284Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 2、不同地区哪种类型的房产房价最贵,使用pivot_table函数\n",
|
||
"data.pivot_table(values='房屋价格', index='new_postcode', columns='房屋类型', aggfunc='max')"
|
||
],
|
||
"id": "a5e4f3321d168313",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"房屋类型 house unit\n",
|
||
"new_postcode \n",
|
||
"26 8000000 2500000\n",
|
||
"29 5425000 769500"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th>房屋类型</th>\n",
|
||
" <th>house</th>\n",
|
||
" <th>unit</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>new_postcode</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>8000000</td>\n",
|
||
" <td>2500000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>5425000</td>\n",
|
||
" <td>769500</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 7
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T08:04:41.035870Z",
|
||
"start_time": "2025-04-02T08:04:41.012959Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 3、不同类型房产和不同房间数的房价之间的比较,使用pivot_table函数\n",
|
||
"data.pivot_table(values='房屋价格', index='房屋类型', columns='配套房间数', aggfunc='mean')"
|
||
],
|
||
"id": "4ed9b36daea1c503",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"配套房间数 0 1 2 3 \\\n",
|
||
"房屋类型 \n",
|
||
"house 677394.736842 353634.269663 489555.889339 560117.683516 \n",
|
||
"unit 330850.000000 336570.325391 432502.153116 594535.982287 \n",
|
||
"\n",
|
||
"配套房间数 4 5 \n",
|
||
"房屋类型 \n",
|
||
"house 730667.024375 9.290297e+05 \n",
|
||
"unit 641736.842105 1.146333e+06 "
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th>配套房间数</th>\n",
|
||
" <th>0</th>\n",
|
||
" <th>1</th>\n",
|
||
" <th>2</th>\n",
|
||
" <th>3</th>\n",
|
||
" <th>4</th>\n",
|
||
" <th>5</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>房屋类型</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>house</th>\n",
|
||
" <td>677394.736842</td>\n",
|
||
" <td>353634.269663</td>\n",
|
||
" <td>489555.889339</td>\n",
|
||
" <td>560117.683516</td>\n",
|
||
" <td>730667.024375</td>\n",
|
||
" <td>9.290297e+05</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>unit</th>\n",
|
||
" <td>330850.000000</td>\n",
|
||
" <td>336570.325391</td>\n",
|
||
" <td>432502.153116</td>\n",
|
||
" <td>594535.982287</td>\n",
|
||
" <td>641736.842105</td>\n",
|
||
" <td>1.146333e+06</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 8
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-02T08:05:23.703349Z",
|
||
"start_time": "2025-04-02T08:05:23.691916Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 4、不同地区不同房间数房屋销售情况交叉表,使用crosstab函数,参考例3-61\n",
|
||
"pd.crosstab(data['new_postcode'], data['配套房间数'])"
|
||
],
|
||
"id": "799d99489d93b2b5",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"配套房间数 0 1 2 3 4 5\n",
|
||
"new_postcode \n",
|
||
"26 24 1383 2815 6371 4793 1007\n",
|
||
"29 5 175 558 4557 4845 835"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th>配套房间数</th>\n",
|
||
" <th>0</th>\n",
|
||
" <th>1</th>\n",
|
||
" <th>2</th>\n",
|
||
" <th>3</th>\n",
|
||
" <th>4</th>\n",
|
||
" <th>5</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>new_postcode</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>26</th>\n",
|
||
" <td>24</td>\n",
|
||
" <td>1383</td>\n",
|
||
" <td>2815</td>\n",
|
||
" <td>6371</td>\n",
|
||
" <td>4793</td>\n",
|
||
" <td>1007</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>29</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>175</td>\n",
|
||
" <td>558</td>\n",
|
||
" <td>4557</td>\n",
|
||
" <td>4845</td>\n",
|
||
" <td>835</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 10
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|