{ "cells": [ { "cell_type": "code", "id": "initial_id", "metadata": { "collapsed": true, "ExecuteTime": { "end_time": "2025-04-02T07:51:13.983021Z", "start_time": "2025-04-02T07:51:13.980852Z" } }, "source": "import pandas as pd", "outputs": [], "execution_count": 113 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:51:14.035104Z", "start_time": "2025-04-02T07:51:14.008139Z" } }, "cell_type": "code", "source": [ "data = pd.read_csv('data/某地区房屋销售数据 (1).csv', encoding='gbk')\n", "data.head(5)" ], "id": "6f3a167b4381943a", "outputs": [ { "data": { "text/plain": [ " 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数\n", "0 2010/1/4 0:00 2615 435000 house 3\n", "1 2010/1/5 0:00 2904 712000 house 4\n", "2 2010/1/6 0:00 2617 435000 house 4\n", "3 2010/1/6 0:00 2606 1350000 house 5\n", "4 2010/1/7 0:00 2905 612500 house 4" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
房屋出售时间地区邮编房屋价格房屋类型配套房间数
02010/1/4 0:002615435000house3
12010/1/5 0:002904712000house4
22010/1/6 0:002617435000house4
32010/1/6 0:0026061350000house5
42010/1/7 0:002905612500house4
\n", "
" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 114 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:51:14.079308Z", "start_time": "2025-04-02T07:51:14.069694Z" } }, "cell_type": "code", "source": [ "data['new_postcode'] = data['地区邮编'].apply(lambda x: str(x)[:2])\n", "data.head(5)" ], "id": "817b591e756eaf93", "outputs": [ { "data": { "text/plain": [ " 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n", "0 2010/1/4 0:00 2615 435000 house 3 26\n", "1 2010/1/5 0:00 2904 712000 house 4 29\n", "2 2010/1/6 0:00 2617 435000 house 4 26\n", "3 2010/1/6 0:00 2606 1350000 house 5 26\n", "4 2010/1/7 0:00 2905 612500 house 4 29" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
房屋出售时间地区邮编房屋价格房屋类型配套房间数new_postcode
02010/1/4 0:002615435000house326
12010/1/5 0:002904712000house429
22010/1/6 0:002617435000house426
32010/1/6 0:0026061350000house526
42010/1/7 0:002905612500house429
\n", "
" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 115 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:51:14.136665Z", "start_time": "2025-04-02T07:51:14.129644Z" } }, "cell_type": "code", "source": "data.groupby('new_postcode').agg({'房屋出售时间':'count'})", "id": "4f648cd98de38213", "outputs": [ { "data": { "text/plain": [ " 房屋出售时间\n", "new_postcode \n", "26 16393\n", "29 10975" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
房屋出售时间
new_postcode
2616393
2910975
\n", "
" ] }, "execution_count": 116, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 116 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:51:14.229857Z", "start_time": "2025-04-02T07:51:14.216154Z" } }, "cell_type": "code", "source": [ "housesale1 = data.groupby(['房屋类型', 'new_postcode']).apply(lambda x:x).reset_index()\n", "housesale1" ], "id": "31e96124eb1769ea", "outputs": [ { "data": { "text/plain": [ " index 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode\n", "0 0 2010/1/4 0:00 2615 435000 house 3 26\n", "1 1 2010/1/5 0:00 2904 712000 house 4 29\n", "2 2 2010/1/6 0:00 2617 435000 house 4 26\n", "3 3 2010/1/6 0:00 2606 1350000 house 5 26\n", "4 4 2010/1/7 0:00 2905 612500 house 4 29\n", "... ... ... ... ... ... ... ...\n", "27363 27363 2019/7/25 0:00 2900 500000 unit 3 29\n", "27364 27364 2019/7/25 0:00 2612 560000 unit 2 26\n", "27365 27365 2019/7/26 0:00 2912 464950 unit 2 29\n", "27366 27366 2019/7/26 0:00 2601 589000 unit 2 26\n", "27367 27367 2019/7/26 0:00 2612 775000 unit 2 26\n", "\n", "[27368 rows x 7 columns]" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index房屋出售时间地区邮编房屋价格房屋类型配套房间数new_postcode
002010/1/4 0:002615435000house326
112010/1/5 0:002904712000house429
222010/1/6 0:002617435000house426
332010/1/6 0:0026061350000house526
442010/1/7 0:002905612500house429
........................
27363273632019/7/25 0:002900500000unit329
27364273642019/7/25 0:002612560000unit226
27365273652019/7/26 0:002912464950unit229
27366273662019/7/26 0:002601589000unit226
27367273672019/7/26 0:002612775000unit226
\n", "

27368 rows × 7 columns

\n", "
" ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 117 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:51:14.304214Z", "start_time": "2025-04-02T07:51:14.298702Z" } }, "cell_type": "code", "source": "data['平均价格'] = data.groupby(['房屋类型', 'new_postcode'])['房屋价格'].transform('mean')", "id": "5249fcce9b76b48f", "outputs": [], "execution_count": 118 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:52:09.492950Z", "start_time": "2025-04-02T07:52:09.480225Z" } }, "cell_type": "code", "source": [ "data2 = data.drop_duplicates(['房屋类型','new_postcode'],inplace=False)\n", "data2" ], "id": "93afa495c804a0f6", "outputs": [ { "data": { "text/plain": [ " 房屋出售时间 地区邮编 房屋价格 房屋类型 配套房间数 new_postcode 平均价格\n", "0 2010/1/4 0:00 2615 435000 house 3 26 725040.113978\n", "1 2010/1/5 0:00 2904 712000 house 4 29 582085.199671\n", "22595 2010/1/11 0:00 2602 270000 unit 1 26 434573.470446\n", "22607 2010/2/9 0:00 2900 436000 unit 2 29 369109.530255" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
房屋出售时间地区邮编房屋价格房屋类型配套房间数new_postcode平均价格
02010/1/4 0:002615435000house326725040.113978
12010/1/5 0:002904712000house429582085.199671
225952010/1/11 0:002602270000unit126434573.470446
226072010/2/9 0:002900436000unit229369109.530255
\n", "
" ] }, "execution_count": 125, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 125 }, { "metadata": { "ExecuteTime": { "end_time": "2025-04-02T07:52:17.149242Z", "start_time": "2025-04-02T07:52:17.142432Z" } }, "cell_type": "code", "source": "data2[['房屋类型','new_postcode','平均价格']]", "id": "9dd96081baad6b3d", "outputs": [ { "data": { "text/plain": [ " 房屋类型 new_postcode 平均价格\n", "0 house 26 725040.113978\n", "1 house 29 582085.199671\n", "22595 unit 26 434573.470446\n", "22607 unit 29 369109.530255" ], "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
房屋类型new_postcode平均价格
0house26725040.113978
1house29582085.199671
22595unit26434573.470446
22607unit29369109.530255
\n", "
" ] }, "execution_count": 126, "metadata": {}, "output_type": "execute_result" } ], "execution_count": 126 } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }