build(bigdata-system-dev): 初始化项目依赖和数据库表结构

- 新增 MySQL 数据库表结构文件,创建多个数据表和视图- 在 pom.xml 中添加 Scala、Flink、Spark、MySQL、Fastjson 和 Redis 相关依赖- 更新项目属性,设置 Scala版本为 2.12
- 移除原有的 Java编译源和目标版本属性
This commit is contained in:
dev_xulongjin 2025-05-23 09:31:27 +08:00
parent 2ba45b64b3
commit f14148c72a
2 changed files with 230 additions and 3 deletions

View File

@ -8,10 +8,127 @@
<artifactId>bigdata-system-dev</artifactId> <artifactId>bigdata-system-dev</artifactId>
<version>1.0-SNAPSHOT</version> <version>1.0-SNAPSHOT</version>
<properties> <properties>
<maven.compiler.source>8</maven.compiler.source> <scala.version>2.12</scala.version>
<maven.compiler.target>8</maven.compiler.target> <flink.version>1.14.0</flink.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <kafka.version>2.4.1</kafka.version>
<flink-cdc.vesion>2.4.2</flink-cdc.vesion>
<fastjson.version>1.2.83</fastjson.version>
<redis.version>3.3.0</redis.version>
<flink-connector-redis.verion>1.1.5</flink-connector-redis.verion>
<mysql-connector.verion>8.0.28</mysql-connector.verion>
</properties> </properties>
<dependencies>
<!-- Flink -->
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-runtime-web_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-clients_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-kafka_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-scala-bridge_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-jdbc_${scala.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<!-- Spark各组件库================================================================== -->
<!--Spark核心库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark SQL所需库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark Streaming所需库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Spark Streaming针对Kafka的依赖库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!--Graphx的依赖库-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-graphx_2.12</artifactId>
<version>3.2.1</version>
</dependency>
<!-- mysql -->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>${mysql-connector.verion}</version>
</dependency>
<!-- fastjson -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- Redis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>${redis.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-connector-redis_2.11</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.flink</groupId>
<artifactId>flink-shaded-hadoop2</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</exclusion>
</exclusions>
<version>${flink-connector-redis.verion}</version>
</dependency>
<dependency>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
<version>1.1.10.1</version>
</dependency>
</dependencies>
</project> </project>

View File

@ -0,0 +1,110 @@
show databases;
create database if not exists spark_web CHARACTER SET utf8mb4;
use spark_web;
#3.3创建一个数据库汇总表
create table sum (
imei varchar(10) default null comment'用户编号',
log_times int(2) default null comment'登陆次数',
online_time int(10) default null comment'在线时长(秒)'
) engine=innodb default charset=utf8;
#3.4创建一个数据明细表
create table detail (
imei varchar(10) default null comment'用户编号',
first_login_time varchar(100) default null comment'首次登录时间',
online_time int(10) default null comment'在线时长(秒)'
) engine=innodb default charset=utf8;
#3.5创建一个数据原始明细表
create table cleanMap (
imei varchar(10) default null comment'用户编号',
logid varchar(100) default null comment'登录时间',
requestip varchar(11) default null comment'登录IP地址',
areacode varchar(11) default null comment'登录区域',
requesttype varchar(11) default null comment'请求类型',
channelne varchar(11) default null comment'渠道'
)engine=innodb default charset=utf8;
#3.6创建一个区域维表
create table t_dim_area (
areacode varchar(11) default null comment'区域编码',
areaname varchar(100) default null comment'区域名称'
)engine=innodb default charset=utf8;
#一定要插入数据,否则前后端无法进行数据分析
insert into t_dim_area values('0','浙江省丽水市');
insert into t_dim_area values('1','福建省南平市');
insert into t_dim_area values('2','福建省福州市');
#3.7 创建一个渠道维表
create table t_dim_channel (
channelno varchar(11) default null comment'渠道编号',
channelname varchar(100) default null comment'渠道名称'
)engine=innodb default charset=utf8 row_format=dynamic;
insert into t_dim_channel values ('0','手机');
insert into t_dim_channel values ('1','PC');
insert into t_dim_channel values ('2','平板电脑');
#3.8创建请求类型维表
create table t_dim_requesttype (
requesttype varchar(11) default null comment'请求类型',
requesttypename varchar(100) default null comment'请求类型名称'
)engine=innodb default charset=utf8 row_format=dynamic;
insert into t_dim_requesttype values ('0','GET方式');
insert into t_dim_requesttype values ('1','POST方式');
#3.9创建一个五个视图
#3.9.1用户渠道趋势分析
create or replace view v_area_channel as
select b.areaname,c.channelname,count(distinct imei) num
from cleanMap a,t_dim_area b,t_dim_channel c
where a.areacode=b.areacode and a.channelne=c.channelno
group by b.areaname,c.channelname;
#3.9.2用户请求类型对比情况
create or replace view v_area_requesttype as
select b.areaname,c.requesttypename,count(distinct imei) num
from cleanMap a,t_dim_area b,t_dim_requesttype c
where a.areacode=b.areacode and a.requesttype=c.requesttype
group by b.areaname,c.requesttypename;
#3.9.3用户渠道饼图分析
create or replace view v_channelno as
select b.channelname,count(distinct imei) num
from cleanMap a,t_dim_channel b
where a.channelne=b.channelno
group by b.channelname;
#3.9.4用户登录情况分析
create or replace view v_user_login as
select distinct a.imei,a.log_times,b.first_login_time,a.online_time
from sum a,detail b
where a.imei=b.imei;
#3.9.5用户详细情况分析
create or replace view v_user_detail as
select distinct a.imei,a.requestip,d.requesttypename,c.first_login_time,
b.log_times,b.online_time,e.areaname,f.channelname
from cleanMap a,sum b,detail c,t_dim_requesttype d,t_dim_area e,t_dim_channel f
where a.imei=b.imei and a.imei=c.imei
and a.requesttype=d.requesttype
and a.areacode=e.areacode and a.channelne=f.channelno;
#查看spark_web的表
show tables;
show databases;
use spark_web;
show tables;
select * from detail;
select * from sum;
select * from cleanMap;