dev_xulongjin 655911b748 chore(project): 初始化项目结构和配置
- 添加 .idea 目录和相关配置文件,设置项目忽略文件、编码、模块管理等
- 创建商务大数据分析目录和子目录,准备数据和任务笔记本
- 添加示例数据文件:中国城市人口数据.csv
- 创建任务笔记本文件,进行数据处理和分析示例
2025-04-14 16:06:13 +08:00

479 lines
14 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:57:56.382179Z",
"start_time": "2025-04-02T07:57:55.984261Z"
}
},
"cell_type": "code",
"source": "import pandas as pd",
"id": "3244cf38b10be81b",
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T08:00:15.267189Z",
"start_time": "2025-04-02T08:00:15.229542Z"
}
},
"cell_type": "code",
"source": [
"data = pd.read_csv('data/某地区房屋销售数据 (1).csv', encoding='gbk')\n",
"data['new_postcode'] = data['地区邮编'].apply(lambda x: str(x)[:2])\n",
"data.head(3)"
],
"id": "d973cf9fe6ac90a6",
"outputs": [
{
"data": {
"text/plain": [
" 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n",
"0 2010/1/4 0:00 2615 435000 house 3 26\n",
"1 2010/1/5 0:00 2904 712000 house 4 29\n",
"2 2010/1/6 0:00 2617 435000 house 4 26"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>房屋出售时间</th>\n",
" <th>地区邮编</th>\n",
" <th>房屋价格</th>\n",
" <th>房屋类型</th>\n",
" <th>配套房间数</th>\n",
" <th>new_postcode</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2010/1/4 0:00</td>\n",
" <td>2615</td>\n",
" <td>435000</td>\n",
" <td>house</td>\n",
" <td>3</td>\n",
" <td>26</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2010/1/5 0:00</td>\n",
" <td>2904</td>\n",
" <td>712000</td>\n",
" <td>house</td>\n",
" <td>4</td>\n",
" <td>29</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2010/1/6 0:00</td>\n",
" <td>2617</td>\n",
" <td>435000</td>\n",
" <td>house</td>\n",
" <td>4</td>\n",
" <td>26</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T08:00:25.320359Z",
"start_time": "2025-04-02T08:00:25.301349Z"
}
},
"cell_type": "code",
"source": [
"# 1、求出不同地区和不同房间数的房价使用pivot_table函数\n",
"data.pivot_table(values='房屋价格', index='new_postcode', columns='配套房间数', aggfunc='mean')"
],
"id": "c9d4b29b2fbd4334",
"outputs": [
{
"data": {
"text/plain": [
"配套房间数 0 1 2 3 \\\n",
"new_postcode \n",
"26 564125.0 343189.962401 457595.588277 624204.46900 \n",
"29 528000.0 292934.514286 381675.627240 475210.25609 \n",
"\n",
"配套房间数 4 5 \n",
"new_postcode \n",
"26 810389.319007 1.037034e+06 \n",
"29 651102.874716 7.995584e+05 "
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>配套房间数</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" <tr>\n",
" <th>new_postcode</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>564125.0</td>\n",
" <td>343189.962401</td>\n",
" <td>457595.588277</td>\n",
" <td>624204.46900</td>\n",
" <td>810389.319007</td>\n",
" <td>1.037034e+06</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>528000.0</td>\n",
" <td>292934.514286</td>\n",
" <td>381675.627240</td>\n",
" <td>475210.25609</td>\n",
" <td>651102.874716</td>\n",
" <td>7.995584e+05</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 5
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T08:04:02.430064Z",
"start_time": "2025-04-02T08:04:02.415284Z"
}
},
"cell_type": "code",
"source": [
"# 2、不同地区哪种类型的房产房价最贵使用pivot_table函数\n",
"data.pivot_table(values='房屋价格', index='new_postcode', columns='房屋类型', aggfunc='max')"
],
"id": "a5e4f3321d168313",
"outputs": [
{
"data": {
"text/plain": [
"房屋类型 house unit\n",
"new_postcode \n",
"26 8000000 2500000\n",
"29 5425000 769500"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>房屋类型</th>\n",
" <th>house</th>\n",
" <th>unit</th>\n",
" </tr>\n",
" <tr>\n",
" <th>new_postcode</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>8000000</td>\n",
" <td>2500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>5425000</td>\n",
" <td>769500</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 7
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T08:04:41.035870Z",
"start_time": "2025-04-02T08:04:41.012959Z"
}
},
"cell_type": "code",
"source": [
"# 3、不同类型房产和不同房间数的房价之间的比较使用pivot_table函数\n",
"data.pivot_table(values='房屋价格', index='房屋类型', columns='配套房间数', aggfunc='mean')"
],
"id": "4ed9b36daea1c503",
"outputs": [
{
"data": {
"text/plain": [
"配套房间数 0 1 2 3 \\\n",
"房屋类型 \n",
"house 677394.736842 353634.269663 489555.889339 560117.683516 \n",
"unit 330850.000000 336570.325391 432502.153116 594535.982287 \n",
"\n",
"配套房间数 4 5 \n",
"房屋类型 \n",
"house 730667.024375 9.290297e+05 \n",
"unit 641736.842105 1.146333e+06 "
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>配套房间数</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" <tr>\n",
" <th>房屋类型</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>house</th>\n",
" <td>677394.736842</td>\n",
" <td>353634.269663</td>\n",
" <td>489555.889339</td>\n",
" <td>560117.683516</td>\n",
" <td>730667.024375</td>\n",
" <td>9.290297e+05</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unit</th>\n",
" <td>330850.000000</td>\n",
" <td>336570.325391</td>\n",
" <td>432502.153116</td>\n",
" <td>594535.982287</td>\n",
" <td>641736.842105</td>\n",
" <td>1.146333e+06</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 8
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T08:05:23.703349Z",
"start_time": "2025-04-02T08:05:23.691916Z"
}
},
"cell_type": "code",
"source": [
"# 4、不同地区不同房间数房屋销售情况交叉表使用crosstab函数参考例3-61\n",
"pd.crosstab(data['new_postcode'], data['配套房间数'])"
],
"id": "799d99489d93b2b5",
"outputs": [
{
"data": {
"text/plain": [
"配套房间数 0 1 2 3 4 5\n",
"new_postcode \n",
"26 24 1383 2815 6371 4793 1007\n",
"29 5 175 558 4557 4845 835"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>配套房间数</th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" </tr>\n",
" <tr>\n",
" <th>new_postcode</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>24</td>\n",
" <td>1383</td>\n",
" <td>2815</td>\n",
" <td>6371</td>\n",
" <td>4793</td>\n",
" <td>1007</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>5</td>\n",
" <td>175</td>\n",
" <td>558</td>\n",
" <td>4557</td>\n",
" <td>4845</td>\n",
" <td>835</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 10
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}