diff --git a/spark-lesson/src/main/scala/date_20250415/RDDAverageScore.scala b/spark-lesson/src/main/scala/date_20250415/RDDAverageScore.scala index 8a09672..e0682ed 100755 --- a/spark-lesson/src/main/scala/date_20250415/RDDAverageScore.scala +++ b/spark-lesson/src/main/scala/date_20250415/RDDAverageScore.scala @@ -17,9 +17,9 @@ object RDDAverageScore { val sc = new SparkContext(conf) // 读取每个科目的成绩文件 - val mathRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/math.txt") - val chineseRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/chinese.txt") - val englishRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/english.txt") + val mathRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/math.txt") + val chineseRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/chinese.txt") + val englishRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/english.txt") // 处理每个科目RDD,将每一行数据转化为 (姓名, 分数) 二元组 val mathScores: RDD[(String, Int)] = mathRDD.map(line => { @@ -41,7 +41,8 @@ object RDDAverageScore { val allScores: RDD[(String, Int)] = mathScores.union(chineseScores).union(englishScores) // 对每个学生的所有分数进行聚合 (姓名, (总分, 科目数)) - val studentScores: RDD[(String, (Int, Int))] = allScores.补充 + val studentScores: RDD[(String, (Int, Int))] = allScores + .map( x => (x._1,(x._2,1))) .reduceByKey { case ((sum1, count1), (sum2, count2)) => (sum1 + sum2, count1 + count2) }