358 lines
11 KiB
Plaintext
358 lines
11 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.502127Z",
|
||
"start_time": "2025-05-07T06:54:44.496736Z"
|
||
}
|
||
},
|
||
"source": "import pandas as pd",
|
||
"outputs": [],
|
||
"execution_count": 19
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.557639Z",
|
||
"start_time": "2025-05-07T06:54:44.523063Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 读取 Excel 文件中的商品销售数据\n",
|
||
"file_path = 'data/商品销售数据.csv'\n",
|
||
"try:\n",
|
||
" sales_data = pd.read_csv(file_path, encoding='gbk')\n",
|
||
" print(\"数据读取成功,数据基本信息:\")\n",
|
||
" sales_data.info()\n",
|
||
" print(\"数据前几行信息:\")\n",
|
||
" print(sales_data.head().to_csv(sep='\\t', na_rep='nan'))\n",
|
||
"except FileNotFoundError:\n",
|
||
" print(\n",
|
||
" f\"未找到文件 {file_path},请检查文件路径是否正确。\")"
|
||
],
|
||
"id": "e97cde3fb9ef1375",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"数据读取成功,数据基本信息:\n",
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 8978 entries, 0 to 8977\n",
|
||
"Data columns (total 11 columns):\n",
|
||
" # Column Non-Null Count Dtype \n",
|
||
"--- ------ -------------- ----- \n",
|
||
" 0 订单号 8978 non-null object \n",
|
||
" 1 设备ID 8978 non-null object \n",
|
||
" 2 应付金额 8978 non-null float64\n",
|
||
" 3 实际金额 8978 non-null float64\n",
|
||
" 4 商品 8978 non-null object \n",
|
||
" 5 支付时间 8978 non-null object \n",
|
||
" 6 地点 8978 non-null object \n",
|
||
" 7 状态 8978 non-null object \n",
|
||
" 8 提现 8978 non-null object \n",
|
||
" 9 大类 8978 non-null object \n",
|
||
" 10 二级类 8978 non-null object \n",
|
||
"dtypes: float64(2), object(9)\n",
|
||
"memory usage: 771.7+ KB\n",
|
||
"数据前几行信息:\n",
|
||
"\t订单号\t设备ID\t应付金额\t实际金额\t商品\t支付时间\t地点\t状态\t提现\t大类\t二级类\n",
|
||
"0\tDD201708167493190200943961687\tE43A6E078A04228\t4.5\t4.5\t250ml燕塘原味酸奶\t2017/6/1 0:01\tC\t已出货未退款\t已提现\t饮料\t乳制品\n",
|
||
"1\tDD201708167493190206930007675\tE43A6E078A04134\t2.0\t2.0\t145ml旺仔牛奶盒装\t2017/6/1 0:02\tB\t已出货未退款\t已提现\t饮料\t乳制品\n",
|
||
"2\tDD201708167493190368633848103\tE43A6E078A04172\t1.5\t1.5\t劲仔小鱼(卤香味)\t2017/6/1 0:07\tA\t已出货未退款\t已提现\t非饮料\t肉干/豆制品/蛋\n",
|
||
"3\tDD201708167493466235023422173\tE43A6E078A04172\t4.5\t4.5\t80g香飘飘椰果奶茶麦香味\t2017/6/1 0:08\tA\t已出货未退款\t已提现\t饮料\t茶饮料\n",
|
||
"4\tDD20170521150353225D2CC0CD748\tE43A6E078A04172\t3.0\t3.0\t伊利纯牛奶\t2017/6/1 0:08\tA\t已出货未退款\t已提现\t饮料\t乳制品\n",
|
||
"\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 20
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.578832Z",
|
||
"start_time": "2025-05-07T06:54:44.573803Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 按照二级类别分组并对实际金额列求和\n",
|
||
"category_sales = sales_data.groupby('二级类')['实际金额'].sum().reset_index()\n",
|
||
"print(\"各二级类别的销售额:\")\n",
|
||
"print(category_sales)"
|
||
],
|
||
"id": "eb320166a60c9ea2",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"各二级类别的销售额:\n",
|
||
" 二级类 实际金额\n",
|
||
"0 乳制品 5308.5\n",
|
||
"1 其他 3.2\n",
|
||
"2 功能饮料 3581.7\n",
|
||
"3 咖啡 515.0\n",
|
||
"4 坚果炒货 146.8\n",
|
||
"5 方便速食 3004.5\n",
|
||
"6 果冻/龟苓膏 17.0\n",
|
||
"7 果蔬饮料 891.0\n",
|
||
"8 植物蛋白 1497.0\n",
|
||
"9 水 1795.1\n",
|
||
"10 海味零食 371.8\n",
|
||
"11 碳酸饮料 2088.4\n",
|
||
"12 糖果/巧克力 240.7\n",
|
||
"13 纸巾 84.6\n",
|
||
"14 肉干/豆制品/蛋 4378.7\n",
|
||
"15 膨化食品 2799.7\n",
|
||
"16 茶饮料 4905.0\n",
|
||
"17 蜜饯/果干 987.9\n",
|
||
"18 饼干糕点 3837.5\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 21
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.601992Z",
|
||
"start_time": "2025-05-07T06:54:44.598723Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 对求和结果进行降序排序\n",
|
||
"category_sales = category_sales.sort_values(by='实际金额', ascending=False)\n",
|
||
"print(\"按销售额降序排序后的二级类别销售额:\")\n",
|
||
"print(category_sales)"
|
||
],
|
||
"id": "50235c92d3bd17c",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"按销售额降序排序后的二级类别销售额:\n",
|
||
" 二级类 实际金额\n",
|
||
"0 乳制品 5308.5\n",
|
||
"16 茶饮料 4905.0\n",
|
||
"14 肉干/豆制品/蛋 4378.7\n",
|
||
"18 饼干糕点 3837.5\n",
|
||
"2 功能饮料 3581.7\n",
|
||
"5 方便速食 3004.5\n",
|
||
"15 膨化食品 2799.7\n",
|
||
"11 碳酸饮料 2088.4\n",
|
||
"9 水 1795.1\n",
|
||
"8 植物蛋白 1497.0\n",
|
||
"17 蜜饯/果干 987.9\n",
|
||
"7 果蔬饮料 891.0\n",
|
||
"3 咖啡 515.0\n",
|
||
"10 海味零食 371.8\n",
|
||
"12 糖果/巧克力 240.7\n",
|
||
"4 坚果炒货 146.8\n",
|
||
"13 纸巾 84.6\n",
|
||
"6 果冻/龟苓膏 17.0\n",
|
||
"1 其他 3.2\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 22
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.616005Z",
|
||
"start_time": "2025-05-07T06:54:44.612951Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 取排名前 5 的商品类别\n",
|
||
"top_5_category_sales = category_sales.head(5)\n",
|
||
"print(\"排名前 5 的商品类别销售额:\")\n",
|
||
"print(top_5_category_sales)"
|
||
],
|
||
"id": "be36c041c8eccfe1",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"排名前 5 的商品类别销售额:\n",
|
||
" 二级类 实际金额\n",
|
||
"0 乳制品 5308.5\n",
|
||
"16 茶饮料 4905.0\n",
|
||
"14 肉干/豆制品/蛋 4378.7\n",
|
||
"18 饼干糕点 3837.5\n",
|
||
"2 功能饮料 3581.7\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 23
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.641057Z",
|
||
"start_time": "2025-05-07T06:54:44.636193Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"# 统计商品销售数量\n",
|
||
"product_sales_quantity = sales_data.groupby('商品')['商品'].count().reset_index(name='销售数量')\n",
|
||
"print(\"各商品的销售数量:\")\n",
|
||
"print(product_sales_quantity)"
|
||
],
|
||
"id": "fc85a9c33d529237",
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"各商品的销售数量:\n",
|
||
" 商品 销售数量\n",
|
||
"0 100g*5瓶益力多 26\n",
|
||
"1 100g卫龙点心面黑椒牛排味 3\n",
|
||
"2 100g果王咸柑桔罐装 13\n",
|
||
"3 103g康师傅红烧牛肉面 8\n",
|
||
"4 107g出前一丁桶面酱香牛肉王 6\n",
|
||
".. ... ...\n",
|
||
"249 顺宝九制梅 6\n",
|
||
"250 香脆肠 22\n",
|
||
"251 香豆干 63\n",
|
||
"252 鸡爪 7\n",
|
||
"253 鸭翅 112\n",
|
||
"\n",
|
||
"[254 rows x 2 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 24
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.656687Z",
|
||
"start_time": "2025-05-07T06:54:44.651375Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from pyecharts import options as opts\n",
|
||
"from pyecharts.charts import Funnel\n",
|
||
"\n",
|
||
"# 提取排名前 5 的商品类别和对应的销售额\n",
|
||
"categories = top_5_category_sales['二级类'].tolist()\n",
|
||
"sales = top_5_category_sales[\n",
|
||
" '实际金额'].tolist()\n",
|
||
"# 创建漏斗图对象\n",
|
||
"funnel = (Funnel().add(\"商品类别销售额\",\n",
|
||
" [list(z) for z in zip(categories, sales)],\n",
|
||
" label_opts=opts.LabelOpts(position=\"inside\")).set_global_opts(\n",
|
||
" title_opts=opts.TitleOpts(title=\"排名前 5 的商品类别销售额漏斗图\"),\n",
|
||
" toolbox_opts=opts.ToolboxOpts(is_show=True)))\n",
|
||
"# 渲染图表\n",
|
||
"funnel.render(\"./top_5_category_sales_funnel.html\")"
|
||
],
|
||
"id": "a4aa162f159b79ac",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'/Volumes/Data/04CodeData/gcc-project-py-25-2/商务大数据分析/20250507/top_5_category_sales_funnel.html'"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 25
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.685357Z",
|
||
"start_time": "2025-05-07T06:54:44.675073Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"from pyecharts import options as opts\n",
|
||
"from pyecharts.charts import WordCloud\n",
|
||
"\n",
|
||
"# 提取商品名称和对应的销售数量\n",
|
||
"products = product_sales_quantity['商品'].tolist()\n",
|
||
"quantities = product_sales_quantity[\n",
|
||
" '销售数量'].tolist()\n",
|
||
"# 组合商品名称和销售数量\n",
|
||
"data = [list(z) for z in zip(products, quantities)]\n",
|
||
"# 创建词云图对象\n",
|
||
"wordcloud = (WordCloud().add(\"\",\n",
|
||
" data,\n",
|
||
" word_size_range=[20, 100]).set_global_opts(\n",
|
||
" title_opts=opts.TitleOpts(title=\"商品销售数量词云图\"),\n",
|
||
" toolbox_opts=opts.ToolboxOpts(is_show=True)))\n",
|
||
"# 渲染词云图\n",
|
||
"wordcloud.render(\"./product_sales_wordcloud.html\")"
|
||
],
|
||
"id": "95347caf8be5b12a",
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'/Volumes/Data/04CodeData/gcc-project-py-25-2/商务大数据分析/20250507/product_sales_wordcloud.html'"
|
||
]
|
||
},
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"execution_count": 26
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-05-07T06:54:44.697529Z",
|
||
"start_time": "2025-05-07T06:54:44.695983Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": "",
|
||
"id": "5b4334b07f625e34",
|
||
"outputs": [],
|
||
"execution_count": null
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|