2025-05-11 16:54:32 +08:00

358 lines
11 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.502127Z",
"start_time": "2025-05-07T06:54:44.496736Z"
}
},
"source": "import pandas as pd",
"outputs": [],
"execution_count": 19
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.557639Z",
"start_time": "2025-05-07T06:54:44.523063Z"
}
},
"cell_type": "code",
"source": [
"# 读取 Excel 文件中的商品销售数据\n",
"file_path = 'data/商品销售数据.csv'\n",
"try:\n",
" sales_data = pd.read_csv(file_path, encoding='gbk')\n",
" print(\"数据读取成功,数据基本信息:\")\n",
" sales_data.info()\n",
" print(\"数据前几行信息:\")\n",
" print(sales_data.head().to_csv(sep='\\t', na_rep='nan'))\n",
"except FileNotFoundError:\n",
" print(\n",
" f\"未找到文件 {file_path},请检查文件路径是否正确。\")"
],
"id": "e97cde3fb9ef1375",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"数据读取成功,数据基本信息:\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 8978 entries, 0 to 8977\n",
"Data columns (total 11 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 订单号 8978 non-null object \n",
" 1 设备ID 8978 non-null object \n",
" 2 应付金额 8978 non-null float64\n",
" 3 实际金额 8978 non-null float64\n",
" 4 商品 8978 non-null object \n",
" 5 支付时间 8978 non-null object \n",
" 6 地点 8978 non-null object \n",
" 7 状态 8978 non-null object \n",
" 8 提现 8978 non-null object \n",
" 9 大类 8978 non-null object \n",
" 10 二级类 8978 non-null object \n",
"dtypes: float64(2), object(9)\n",
"memory usage: 771.7+ KB\n",
"数据前几行信息:\n",
"\t订单号\t设备ID\t应付金额\t实际金额\t商品\t支付时间\t地点\t状态\t提现\t大类\t二级类\n",
"0\tDD201708167493190200943961687\tE43A6E078A04228\t4.5\t4.5\t250ml燕塘原味酸奶\t2017/6/1 0:01\tC\t已出货未退款\t已提现\t饮料\t乳制品\n",
"1\tDD201708167493190206930007675\tE43A6E078A04134\t2.0\t2.0\t145ml旺仔牛奶盒装\t2017/6/1 0:02\tB\t已出货未退款\t已提现\t饮料\t乳制品\n",
"2\tDD201708167493190368633848103\tE43A6E078A04172\t1.5\t1.5\t劲仔小鱼卤香味\t2017/6/1 0:07\tA\t已出货未退款\t已提现\t非饮料\t肉干/豆制品/蛋\n",
"3\tDD201708167493466235023422173\tE43A6E078A04172\t4.5\t4.5\t80g香飘飘椰果奶茶麦香味\t2017/6/1 0:08\tA\t已出货未退款\t已提现\t饮料\t茶饮料\n",
"4\tDD20170521150353225D2CC0CD748\tE43A6E078A04172\t3.0\t3.0\t伊利纯牛奶\t2017/6/1 0:08\tA\t已出货未退款\t已提现\t饮料\t乳制品\n",
"\n"
]
}
],
"execution_count": 20
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.578832Z",
"start_time": "2025-05-07T06:54:44.573803Z"
}
},
"cell_type": "code",
"source": [
"# 按照二级类别分组并对实际金额列求和\n",
"category_sales = sales_data.groupby('二级类')['实际金额'].sum().reset_index()\n",
"print(\"各二级类别的销售额:\")\n",
"print(category_sales)"
],
"id": "eb320166a60c9ea2",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"各二级类别的销售额:\n",
" 二级类 实际金额\n",
"0 乳制品 5308.5\n",
"1 其他 3.2\n",
"2 功能饮料 3581.7\n",
"3 咖啡 515.0\n",
"4 坚果炒货 146.8\n",
"5 方便速食 3004.5\n",
"6 果冻/龟苓膏 17.0\n",
"7 果蔬饮料 891.0\n",
"8 植物蛋白 1497.0\n",
"9 水 1795.1\n",
"10 海味零食 371.8\n",
"11 碳酸饮料 2088.4\n",
"12 糖果/巧克力 240.7\n",
"13 纸巾 84.6\n",
"14 肉干/豆制品/蛋 4378.7\n",
"15 膨化食品 2799.7\n",
"16 茶饮料 4905.0\n",
"17 蜜饯/果干 987.9\n",
"18 饼干糕点 3837.5\n"
]
}
],
"execution_count": 21
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.601992Z",
"start_time": "2025-05-07T06:54:44.598723Z"
}
},
"cell_type": "code",
"source": [
"# 对求和结果进行降序排序\n",
"category_sales = category_sales.sort_values(by='实际金额', ascending=False)\n",
"print(\"按销售额降序排序后的二级类别销售额:\")\n",
"print(category_sales)"
],
"id": "50235c92d3bd17c",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"按销售额降序排序后的二级类别销售额:\n",
" 二级类 实际金额\n",
"0 乳制品 5308.5\n",
"16 茶饮料 4905.0\n",
"14 肉干/豆制品/蛋 4378.7\n",
"18 饼干糕点 3837.5\n",
"2 功能饮料 3581.7\n",
"5 方便速食 3004.5\n",
"15 膨化食品 2799.7\n",
"11 碳酸饮料 2088.4\n",
"9 水 1795.1\n",
"8 植物蛋白 1497.0\n",
"17 蜜饯/果干 987.9\n",
"7 果蔬饮料 891.0\n",
"3 咖啡 515.0\n",
"10 海味零食 371.8\n",
"12 糖果/巧克力 240.7\n",
"4 坚果炒货 146.8\n",
"13 纸巾 84.6\n",
"6 果冻/龟苓膏 17.0\n",
"1 其他 3.2\n"
]
}
],
"execution_count": 22
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.616005Z",
"start_time": "2025-05-07T06:54:44.612951Z"
}
},
"cell_type": "code",
"source": [
"# 取排名前 5 的商品类别\n",
"top_5_category_sales = category_sales.head(5)\n",
"print(\"排名前 5 的商品类别销售额:\")\n",
"print(top_5_category_sales)"
],
"id": "be36c041c8eccfe1",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"排名前 5 的商品类别销售额:\n",
" 二级类 实际金额\n",
"0 乳制品 5308.5\n",
"16 茶饮料 4905.0\n",
"14 肉干/豆制品/蛋 4378.7\n",
"18 饼干糕点 3837.5\n",
"2 功能饮料 3581.7\n"
]
}
],
"execution_count": 23
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.641057Z",
"start_time": "2025-05-07T06:54:44.636193Z"
}
},
"cell_type": "code",
"source": [
"# 统计商品销售数量\n",
"product_sales_quantity = sales_data.groupby('商品')['商品'].count().reset_index(name='销售数量')\n",
"print(\"各商品的销售数量:\")\n",
"print(product_sales_quantity)"
],
"id": "fc85a9c33d529237",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"各商品的销售数量:\n",
" 商品 销售数量\n",
"0 100g*5瓶益力多 26\n",
"1 100g卫龙点心面黑椒牛排味 3\n",
"2 100g果王咸柑桔罐装 13\n",
"3 103g康师傅红烧牛肉面 8\n",
"4 107g出前一丁桶面酱香牛肉王 6\n",
".. ... ...\n",
"249 顺宝九制梅 6\n",
"250 香脆肠 22\n",
"251 香豆干 63\n",
"252 鸡爪 7\n",
"253 鸭翅 112\n",
"\n",
"[254 rows x 2 columns]\n"
]
}
],
"execution_count": 24
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.656687Z",
"start_time": "2025-05-07T06:54:44.651375Z"
}
},
"cell_type": "code",
"source": [
"from pyecharts import options as opts\n",
"from pyecharts.charts import Funnel\n",
"\n",
"# 提取排名前 5 的商品类别和对应的销售额\n",
"categories = top_5_category_sales['二级类'].tolist()\n",
"sales = top_5_category_sales[\n",
" '实际金额'].tolist()\n",
"# 创建漏斗图对象\n",
"funnel = (Funnel().add(\"商品类别销售额\",\n",
" [list(z) for z in zip(categories, sales)],\n",
" label_opts=opts.LabelOpts(position=\"inside\")).set_global_opts(\n",
" title_opts=opts.TitleOpts(title=\"排名前 5 的商品类别销售额漏斗图\"),\n",
" toolbox_opts=opts.ToolboxOpts(is_show=True)))\n",
"# 渲染图表\n",
"funnel.render(\"./top_5_category_sales_funnel.html\")"
],
"id": "a4aa162f159b79ac",
"outputs": [
{
"data": {
"text/plain": [
"'/Volumes/Data/04CodeData/gcc-project-py-25-2/商务大数据分析/20250507/top_5_category_sales_funnel.html'"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 25
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.685357Z",
"start_time": "2025-05-07T06:54:44.675073Z"
}
},
"cell_type": "code",
"source": [
"from pyecharts import options as opts\n",
"from pyecharts.charts import WordCloud\n",
"\n",
"# 提取商品名称和对应的销售数量\n",
"products = product_sales_quantity['商品'].tolist()\n",
"quantities = product_sales_quantity[\n",
" '销售数量'].tolist()\n",
"# 组合商品名称和销售数量\n",
"data = [list(z) for z in zip(products, quantities)]\n",
"# 创建词云图对象\n",
"wordcloud = (WordCloud().add(\"\",\n",
" data,\n",
" word_size_range=[20, 100]).set_global_opts(\n",
" title_opts=opts.TitleOpts(title=\"商品销售数量词云图\"),\n",
" toolbox_opts=opts.ToolboxOpts(is_show=True)))\n",
"# 渲染词云图\n",
"wordcloud.render(\"./product_sales_wordcloud.html\")"
],
"id": "95347caf8be5b12a",
"outputs": [
{
"data": {
"text/plain": [
"'/Volumes/Data/04CodeData/gcc-project-py-25-2/商务大数据分析/20250507/product_sales_wordcloud.html'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"execution_count": 26
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-05-07T06:54:44.697529Z",
"start_time": "2025-05-07T06:54:44.695983Z"
}
},
"cell_type": "code",
"source": "",
"id": "5b4334b07f625e34",
"outputs": [],
"execution_count": null
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}