- 添加 .idea 目录和相关配置文件,设置项目忽略文件、编码、模块管理等 - 创建商务大数据分析目录和子目录,准备数据和任务笔记本 - 添加示例数据文件:中国城市人口数据.csv - 创建任务笔记本文件,进行数据处理和分析示例
465 lines
12 KiB
Plaintext
465 lines
12 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T10:57:29.263616Z",
|
||
"start_time": "2025-04-08T10:57:28.865194Z"
|
||
}
|
||
},
|
||
"source": "import pandas as pd",
|
||
"outputs": [],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T10:59:11.991479Z",
|
||
"start_time": "2025-04-08T10:59:11.985778Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"data = pd.read_csv('data/中国城市人口数据.csv',encoding=\"GBK\")\n",
|
||
"data.head(5)"
|
||
],
|
||
"id": "c3fd933261d1f7fb",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 省份 2020年人口(万人) 2019年人口(万人)\n",
|
||
"0 河北省 7461 7447\n",
|
||
"1 山西省 3492 3497\n",
|
||
"2 辽宁省 4259 4277\n",
|
||
"3 吉林省 2407 2448\n",
|
||
"4 江苏省 8475 8469"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>省份</th>\n",
|
||
" <th>2020年人口(万人)</th>\n",
|
||
" <th>2019年人口(万人)</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>河北省</td>\n",
|
||
" <td>7461</td>\n",
|
||
" <td>7447</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>山西省</td>\n",
|
||
" <td>3492</td>\n",
|
||
" <td>3497</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>辽宁省</td>\n",
|
||
" <td>4259</td>\n",
|
||
" <td>4277</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>吉林省</td>\n",
|
||
" <td>2407</td>\n",
|
||
" <td>2448</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>江苏省</td>\n",
|
||
" <td>8475</td>\n",
|
||
" <td>8469</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 4
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:10:42.958649Z",
|
||
"start_time": "2025-04-08T11:10:42.956278Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "data.shape",
|
||
"id": "a0d05b5dea7e5cfc",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(22, 3)"
|
||
]
|
||
},
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 15
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:10:37.015499Z",
|
||
"start_time": "2025-04-08T11:10:37.005663Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "data.info()",
|
||
"id": "b602f50b182485dd",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 22 entries, 0 to 21\n",
|
||
"Data columns (total 3 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 省份 22 non-null object\n",
|
||
" 1 2020年人口(万人) 22 non-null int64 \n",
|
||
" 2 2019年人口(万人) 22 non-null int64 \n",
|
||
"dtypes: int64(2), object(1)\n",
|
||
"memory usage: 656.0+ bytes\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 13
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:01:22.100819Z",
|
||
"start_time": "2025-04-08T11:01:22.080321Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "data.describe()",
|
||
"id": "1218c2b44c21d012",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 2020年人口(万人) 2019年人口(万人)\n",
|
||
"count 22.000000 22.000000\n",
|
||
"mean 5482.772727 5478.500000\n",
|
||
"std 3067.216187 3043.789239\n",
|
||
"min 592.000000 590.000000\n",
|
||
"25% 3583.000000 3584.750000\n",
|
||
"50% 4620.000000 4615.000000\n",
|
||
"75% 7256.750000 7245.250000\n",
|
||
"max 12601.000000 12489.000000"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>2020年人口(万人)</th>\n",
|
||
" <th>2019年人口(万人)</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>22.000000</td>\n",
|
||
" <td>22.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>5482.772727</td>\n",
|
||
" <td>5478.500000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>3067.216187</td>\n",
|
||
" <td>3043.789239</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>592.000000</td>\n",
|
||
" <td>590.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>3583.000000</td>\n",
|
||
" <td>3584.750000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>4620.000000</td>\n",
|
||
" <td>4615.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>7256.750000</td>\n",
|
||
" <td>7245.250000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>12601.000000</td>\n",
|
||
" <td>12489.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 7
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:10:29.629195Z",
|
||
"start_time": "2025-04-08T11:10:29.625545Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 2020年总人口\n",
|
||
"data['2020年人口(万人)'].sum()"
|
||
],
|
||
"id": "93faae0d69a5d4e2",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"120621"
|
||
]
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 12
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:11:33.375487Z",
|
||
"start_time": "2025-04-08T11:11:33.361781Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 2019年总人口\n",
|
||
"data['2019年人口(万人)'].sum()"
|
||
],
|
||
"id": "81d587e3605ba734",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"120527"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 16
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:13:18.823381Z",
|
||
"start_time": "2025-04-08T11:13:18.809605Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 总人口对比\n",
|
||
"data['2020年人口(万人)'].sum() - data['2019年人口(万人)'].sum()"
|
||
],
|
||
"id": "2e5f8e1821c05cdf",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"94"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 20
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-04-08T11:13:20.739094Z",
|
||
"start_time": "2025-04-08T11:13:20.731449Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 各省人口对比\n",
|
||
"data2 = data\n",
|
||
"data2['compare'] = data2['2020年人口(万人)'] - data2['2019年人口(万人)']\n",
|
||
"data2.head(5)"
|
||
],
|
||
"id": "e7bda6c2c79305bb",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
" 省份 2020年人口(万人) 2019年人口(万人) compare\n",
|
||
"0 河北省 7461 7447 14\n",
|
||
"1 山西省 3492 3497 -5\n",
|
||
"2 辽宁省 4259 4277 -18\n",
|
||
"3 吉林省 2407 2448 -41\n",
|
||
"4 江苏省 8475 8469 6"
|
||
],
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>省份</th>\n",
|
||
" <th>2020年人口(万人)</th>\n",
|
||
" <th>2019年人口(万人)</th>\n",
|
||
" <th>compare</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>河北省</td>\n",
|
||
" <td>7461</td>\n",
|
||
" <td>7447</td>\n",
|
||
" <td>14</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>山西省</td>\n",
|
||
" <td>3492</td>\n",
|
||
" <td>3497</td>\n",
|
||
" <td>-5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>辽宁省</td>\n",
|
||
" <td>4259</td>\n",
|
||
" <td>4277</td>\n",
|
||
" <td>-18</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>吉林省</td>\n",
|
||
" <td>2407</td>\n",
|
||
" <td>2448</td>\n",
|
||
" <td>-41</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>江苏省</td>\n",
|
||
" <td>8475</td>\n",
|
||
" <td>8469</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
]
|
||
},
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 21
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|