refactor(spark-lesson): 实验五部分内容

This commit is contained in:
dev_xulongjin 2025-04-21 16:23:46 +08:00
parent 2840970cb0
commit c5a46a98e0

View File

@ -17,9 +17,9 @@ object RDDAverageScore {
val sc = new SparkContext(conf) val sc = new SparkContext(conf)
// 读取每个科目的成绩文件 // 读取每个科目的成绩文件
val mathRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/math.txt") val mathRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/math.txt")
val chineseRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/chinese.txt") val chineseRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/chinese.txt")
val englishRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/english.txt") val englishRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/english.txt")
// 处理每个科目RDD将每一行数据转化为 (姓名, 分数) 二元组 // 处理每个科目RDD将每一行数据转化为 (姓名, 分数) 二元组
val mathScores: RDD[(String, Int)] = mathRDD.map(line => { val mathScores: RDD[(String, Int)] = mathRDD.map(line => {
@ -41,7 +41,8 @@ object RDDAverageScore {
val allScores: RDD[(String, Int)] = mathScores.union(chineseScores).union(englishScores) val allScores: RDD[(String, Int)] = mathScores.union(chineseScores).union(englishScores)
// 对每个学生的所有分数进行聚合 (姓名, (总分, 科目数)) // 对每个学生的所有分数进行聚合 (姓名, (总分, 科目数))
val studentScores: RDD[(String, (Int, Int))] = allScores.补充 val studentScores: RDD[(String, (Int, Int))] = allScores
.map( x => (x._1,(x._2,1)))
.reduceByKey { case ((sum1, count1), (sum2, count2)) => .reduceByKey { case ((sum1, count1), (sum2, count2)) =>
(sum1 + sum2, count1 + count2) (sum1 + sum2, count1 + count2)
} }