dev_xulongjin 655911b748 chore(project): 初始化项目结构和配置
- 添加 .idea 目录和相关配置文件,设置项目忽略文件、编码、模块管理等
- 创建商务大数据分析目录和子目录,准备数据和任务笔记本
- 添加示例数据文件:中国城市人口数据.csv
- 创建任务笔记本文件,进行数据处理和分析示例
2025-04-14 16:06:13 +08:00

465 lines
12 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-04-08T10:57:29.263616Z",
"start_time": "2025-04-08T10:57:28.865194Z"
}
},
"source": "import pandas as pd",
"outputs": [],
"execution_count": 1
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T10:59:11.991479Z",
"start_time": "2025-04-08T10:59:11.985778Z"
}
},
"cell_type": "code",
"source": [
"data = pd.read_csv('data/中国城市人口数据.csv',encoding=\"GBK\")\n",
"data.head(5)"
],
"id": "c3fd933261d1f7fb",
"outputs": [
{
"data": {
"text/plain": [
" 省份 2020年人口万人 2019年人口万人\n",
"0 河北省 7461 7447\n",
"1 山西省 3492 3497\n",
"2 辽宁省 4259 4277\n",
"3 吉林省 2407 2448\n",
"4 江苏省 8475 8469"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>省份</th>\n",
" <th>2020年人口万人</th>\n",
" <th>2019年人口万人</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>河北省</td>\n",
" <td>7461</td>\n",
" <td>7447</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>山西省</td>\n",
" <td>3492</td>\n",
" <td>3497</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>辽宁省</td>\n",
" <td>4259</td>\n",
" <td>4277</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>吉林省</td>\n",
" <td>2407</td>\n",
" <td>2448</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>江苏省</td>\n",
" <td>8475</td>\n",
" <td>8469</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 4
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:10:42.958649Z",
"start_time": "2025-04-08T11:10:42.956278Z"
}
},
"cell_type": "code",
"source": "data.shape",
"id": "a0d05b5dea7e5cfc",
"outputs": [
{
"data": {
"text/plain": [
"(22, 3)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 15
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:10:37.015499Z",
"start_time": "2025-04-08T11:10:37.005663Z"
}
},
"cell_type": "code",
"source": "data.info()",
"id": "b602f50b182485dd",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 22 entries, 0 to 21\n",
"Data columns (total 3 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 省份 22 non-null object\n",
" 1 2020年人口万人 22 non-null int64 \n",
" 2 2019年人口万人 22 non-null int64 \n",
"dtypes: int64(2), object(1)\n",
"memory usage: 656.0+ bytes\n"
]
}
],
"execution_count": 13
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:01:22.100819Z",
"start_time": "2025-04-08T11:01:22.080321Z"
}
},
"cell_type": "code",
"source": "data.describe()",
"id": "1218c2b44c21d012",
"outputs": [
{
"data": {
"text/plain": [
" 2020年人口万人 2019年人口万人\n",
"count 22.000000 22.000000\n",
"mean 5482.772727 5478.500000\n",
"std 3067.216187 3043.789239\n",
"min 592.000000 590.000000\n",
"25% 3583.000000 3584.750000\n",
"50% 4620.000000 4615.000000\n",
"75% 7256.750000 7245.250000\n",
"max 12601.000000 12489.000000"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>2020年人口万人</th>\n",
" <th>2019年人口万人</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>22.000000</td>\n",
" <td>22.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>5482.772727</td>\n",
" <td>5478.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>3067.216187</td>\n",
" <td>3043.789239</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>592.000000</td>\n",
" <td>590.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>3583.000000</td>\n",
" <td>3584.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>4620.000000</td>\n",
" <td>4615.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>7256.750000</td>\n",
" <td>7245.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>12601.000000</td>\n",
" <td>12489.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 7
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:10:29.629195Z",
"start_time": "2025-04-08T11:10:29.625545Z"
}
},
"cell_type": "code",
"source": [
"# 2020年总人口\n",
"data['2020年人口万人'].sum()"
],
"id": "93faae0d69a5d4e2",
"outputs": [
{
"data": {
"text/plain": [
"120621"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 12
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:11:33.375487Z",
"start_time": "2025-04-08T11:11:33.361781Z"
}
},
"cell_type": "code",
"source": [
"# 2019年总人口\n",
"data['2019年人口万人'].sum()"
],
"id": "81d587e3605ba734",
"outputs": [
{
"data": {
"text/plain": [
"120527"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 16
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:13:18.823381Z",
"start_time": "2025-04-08T11:13:18.809605Z"
}
},
"cell_type": "code",
"source": [
"# 总人口对比\n",
"data['2020年人口万人'].sum() - data['2019年人口万人'].sum()"
],
"id": "2e5f8e1821c05cdf",
"outputs": [
{
"data": {
"text/plain": [
"94"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-04-08T11:13:20.739094Z",
"start_time": "2025-04-08T11:13:20.731449Z"
}
},
"cell_type": "code",
"source": [
"# 各省人口对比\n",
"data2 = data\n",
"data2['compare'] = data2['2020年人口万人'] - data2['2019年人口万人']\n",
"data2.head(5)"
],
"id": "e7bda6c2c79305bb",
"outputs": [
{
"data": {
"text/plain": [
" 省份 2020年人口万人 2019年人口万人 compare\n",
"0 河北省 7461 7447 14\n",
"1 山西省 3492 3497 -5\n",
"2 辽宁省 4259 4277 -18\n",
"3 吉林省 2407 2448 -41\n",
"4 江苏省 8475 8469 6"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>省份</th>\n",
" <th>2020年人口万人</th>\n",
" <th>2019年人口万人</th>\n",
" <th>compare</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>河北省</td>\n",
" <td>7461</td>\n",
" <td>7447</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>山西省</td>\n",
" <td>3492</td>\n",
" <td>3497</td>\n",
" <td>-5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>辽宁省</td>\n",
" <td>4259</td>\n",
" <td>4277</td>\n",
" <td>-18</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>吉林省</td>\n",
" <td>2407</td>\n",
" <td>2448</td>\n",
" <td>-41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>江苏省</td>\n",
" <td>8475</td>\n",
" <td>8469</td>\n",
" <td>6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 21
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}