{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-04-02T07:51:13.983021Z",
"start_time": "2025-04-02T07:51:13.980852Z"
}
},
"source": "import pandas as pd",
"outputs": [],
"execution_count": 113
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:51:14.035104Z",
"start_time": "2025-04-02T07:51:14.008139Z"
}
},
"cell_type": "code",
"source": [
"data = pd.read_csv('data/某地区房屋销售数据 (1).csv', encoding='gbk')\n",
"data.head(5)"
],
"id": "6f3a167b4381943a",
"outputs": [
{
"data": {
"text/plain": [
" 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数\n",
"0 2010/1/4 0:00 2615 435000 house 3\n",
"1 2010/1/5 0:00 2904 712000 house 4\n",
"2 2010/1/6 0:00 2617 435000 house 4\n",
"3 2010/1/6 0:00 2606 1350000 house 5\n",
"4 2010/1/7 0:00 2905 612500 house 4"
],
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 房屋出售时间 | \n",
" 地区邮编 | \n",
" 房屋价格 | \n",
" 房屋类型 | \n",
" 配套房间数 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2010/1/4 0:00 | \n",
" 2615 | \n",
" 435000 | \n",
" house | \n",
" 3 | \n",
"
\n",
" \n",
" 1 | \n",
" 2010/1/5 0:00 | \n",
" 2904 | \n",
" 712000 | \n",
" house | \n",
" 4 | \n",
"
\n",
" \n",
" 2 | \n",
" 2010/1/6 0:00 | \n",
" 2617 | \n",
" 435000 | \n",
" house | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 2010/1/6 0:00 | \n",
" 2606 | \n",
" 1350000 | \n",
" house | \n",
" 5 | \n",
"
\n",
" \n",
" 4 | \n",
" 2010/1/7 0:00 | \n",
" 2905 | \n",
" 612500 | \n",
" house | \n",
" 4 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 114
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:51:14.079308Z",
"start_time": "2025-04-02T07:51:14.069694Z"
}
},
"cell_type": "code",
"source": [
"data['new_postcode'] = data['地区邮编'].apply(lambda x: str(x)[:2])\n",
"data.head(5)"
],
"id": "817b591e756eaf93",
"outputs": [
{
"data": {
"text/plain": [
" 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n",
"0 2010/1/4 0:00 2615 435000 house 3 26\n",
"1 2010/1/5 0:00 2904 712000 house 4 29\n",
"2 2010/1/6 0:00 2617 435000 house 4 26\n",
"3 2010/1/6 0:00 2606 1350000 house 5 26\n",
"4 2010/1/7 0:00 2905 612500 house 4 29"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 房屋出售时间 | \n",
" 地区邮编 | \n",
" 房屋价格 | \n",
" 房屋类型 | \n",
" 配套房间数 | \n",
" new_postcode | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2010/1/4 0:00 | \n",
" 2615 | \n",
" 435000 | \n",
" house | \n",
" 3 | \n",
" 26 | \n",
"
\n",
" \n",
" 1 | \n",
" 2010/1/5 0:00 | \n",
" 2904 | \n",
" 712000 | \n",
" house | \n",
" 4 | \n",
" 29 | \n",
"
\n",
" \n",
" 2 | \n",
" 2010/1/6 0:00 | \n",
" 2617 | \n",
" 435000 | \n",
" house | \n",
" 4 | \n",
" 26 | \n",
"
\n",
" \n",
" 3 | \n",
" 2010/1/6 0:00 | \n",
" 2606 | \n",
" 1350000 | \n",
" house | \n",
" 5 | \n",
" 26 | \n",
"
\n",
" \n",
" 4 | \n",
" 2010/1/7 0:00 | \n",
" 2905 | \n",
" 612500 | \n",
" house | \n",
" 4 | \n",
" 29 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 115
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:51:14.136665Z",
"start_time": "2025-04-02T07:51:14.129644Z"
}
},
"cell_type": "code",
"source": "data.groupby('new_postcode').agg({'房屋出售时间':'count'})",
"id": "4f648cd98de38213",
"outputs": [
{
"data": {
"text/plain": [
" 房屋出售时间\n",
"new_postcode \n",
"26 16393\n",
"29 10975"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 房屋出售时间 | \n",
"
\n",
" \n",
" new_postcode | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 26 | \n",
" 16393 | \n",
"
\n",
" \n",
" 29 | \n",
" 10975 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 116
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:51:14.229857Z",
"start_time": "2025-04-02T07:51:14.216154Z"
}
},
"cell_type": "code",
"source": [
"housesale1 = data.groupby(['房屋类型', 'new_postcode']).apply(lambda x:x).reset_index()\n",
"housesale1"
],
"id": "31e96124eb1769ea",
"outputs": [
{
"data": {
"text/plain": [
" index 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n",
"0 0 2010/1/4 0:00 2615 435000 house 3 26\n",
"1 1 2010/1/5 0:00 2904 712000 house 4 29\n",
"2 2 2010/1/6 0:00 2617 435000 house 4 26\n",
"3 3 2010/1/6 0:00 2606 1350000 house 5 26\n",
"4 4 2010/1/7 0:00 2905 612500 house 4 29\n",
"... ... ... ... ... ... ... ...\n",
"27363 27363 2019/7/25 0:00 2900 500000 unit 3 29\n",
"27364 27364 2019/7/25 0:00 2612 560000 unit 2 26\n",
"27365 27365 2019/7/26 0:00 2912 464950 unit 2 29\n",
"27366 27366 2019/7/26 0:00 2601 589000 unit 2 26\n",
"27367 27367 2019/7/26 0:00 2612 775000 unit 2 26\n",
"\n",
"[27368 rows x 7 columns]"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" index | \n",
" 房屋出售时间 | \n",
" 地区邮编 | \n",
" 房屋价格 | \n",
" 房屋类型 | \n",
" 配套房间数 | \n",
" new_postcode | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 2010/1/4 0:00 | \n",
" 2615 | \n",
" 435000 | \n",
" house | \n",
" 3 | \n",
" 26 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2010/1/5 0:00 | \n",
" 2904 | \n",
" 712000 | \n",
" house | \n",
" 4 | \n",
" 29 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2010/1/6 0:00 | \n",
" 2617 | \n",
" 435000 | \n",
" house | \n",
" 4 | \n",
" 26 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 2010/1/6 0:00 | \n",
" 2606 | \n",
" 1350000 | \n",
" house | \n",
" 5 | \n",
" 26 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 2010/1/7 0:00 | \n",
" 2905 | \n",
" 612500 | \n",
" house | \n",
" 4 | \n",
" 29 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 27363 | \n",
" 27363 | \n",
" 2019/7/25 0:00 | \n",
" 2900 | \n",
" 500000 | \n",
" unit | \n",
" 3 | \n",
" 29 | \n",
"
\n",
" \n",
" 27364 | \n",
" 27364 | \n",
" 2019/7/25 0:00 | \n",
" 2612 | \n",
" 560000 | \n",
" unit | \n",
" 2 | \n",
" 26 | \n",
"
\n",
" \n",
" 27365 | \n",
" 27365 | \n",
" 2019/7/26 0:00 | \n",
" 2912 | \n",
" 464950 | \n",
" unit | \n",
" 2 | \n",
" 29 | \n",
"
\n",
" \n",
" 27366 | \n",
" 27366 | \n",
" 2019/7/26 0:00 | \n",
" 2601 | \n",
" 589000 | \n",
" unit | \n",
" 2 | \n",
" 26 | \n",
"
\n",
" \n",
" 27367 | \n",
" 27367 | \n",
" 2019/7/26 0:00 | \n",
" 2612 | \n",
" 775000 | \n",
" unit | \n",
" 2 | \n",
" 26 | \n",
"
\n",
" \n",
"
\n",
"
27368 rows × 7 columns
\n",
"
"
]
},
"execution_count": 117,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 117
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:51:14.304214Z",
"start_time": "2025-04-02T07:51:14.298702Z"
}
},
"cell_type": "code",
"source": "data['平均价格'] = data.groupby(['房屋类型', 'new_postcode'])['房屋价格'].transform('mean')",
"id": "5249fcce9b76b48f",
"outputs": [],
"execution_count": 118
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:52:09.492950Z",
"start_time": "2025-04-02T07:52:09.480225Z"
}
},
"cell_type": "code",
"source": [
"data2 = data.drop_duplicates(['房屋类型','new_postcode'],inplace=False)\n",
"data2"
],
"id": "93afa495c804a0f6",
"outputs": [
{
"data": {
"text/plain": [
" 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode 平均价格\n",
"0 2010/1/4 0:00 2615 435000 house 3 26 725040.113978\n",
"1 2010/1/5 0:00 2904 712000 house 4 29 582085.199671\n",
"22595 2010/1/11 0:00 2602 270000 unit 1 26 434573.470446\n",
"22607 2010/2/9 0:00 2900 436000 unit 2 29 369109.530255"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 房屋出售时间 | \n",
" 地区邮编 | \n",
" 房屋价格 | \n",
" 房屋类型 | \n",
" 配套房间数 | \n",
" new_postcode | \n",
" 平均价格 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2010/1/4 0:00 | \n",
" 2615 | \n",
" 435000 | \n",
" house | \n",
" 3 | \n",
" 26 | \n",
" 725040.113978 | \n",
"
\n",
" \n",
" 1 | \n",
" 2010/1/5 0:00 | \n",
" 2904 | \n",
" 712000 | \n",
" house | \n",
" 4 | \n",
" 29 | \n",
" 582085.199671 | \n",
"
\n",
" \n",
" 22595 | \n",
" 2010/1/11 0:00 | \n",
" 2602 | \n",
" 270000 | \n",
" unit | \n",
" 1 | \n",
" 26 | \n",
" 434573.470446 | \n",
"
\n",
" \n",
" 22607 | \n",
" 2010/2/9 0:00 | \n",
" 2900 | \n",
" 436000 | \n",
" unit | \n",
" 2 | \n",
" 29 | \n",
" 369109.530255 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 125,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 125
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-02T07:52:17.149242Z",
"start_time": "2025-04-02T07:52:17.142432Z"
}
},
"cell_type": "code",
"source": "data2[['房屋类型','new_postcode','平均价格']]",
"id": "9dd96081baad6b3d",
"outputs": [
{
"data": {
"text/plain": [
" 房屋类型 new_postcode 平均价格\n",
"0 house 26 725040.113978\n",
"1 house 29 582085.199671\n",
"22595 unit 26 434573.470446\n",
"22607 unit 29 369109.530255"
],
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" 房屋类型 | \n",
" new_postcode | \n",
" 平均价格 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" house | \n",
" 26 | \n",
" 725040.113978 | \n",
"
\n",
" \n",
" 1 | \n",
" house | \n",
" 29 | \n",
" 582085.199671 | \n",
"
\n",
" \n",
" 22595 | \n",
" unit | \n",
" 26 | \n",
" 434573.470446 | \n",
"
\n",
" \n",
" 22607 | \n",
" unit | \n",
" 29 | \n",
" 369109.530255 | \n",
"
\n",
" \n",
"
\n",
"
"
]
},
"execution_count": 126,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 126
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}