refactor(spark-lesson): 实验五部分内容
This commit is contained in:
parent
2840970cb0
commit
c5a46a98e0
@ -17,9 +17,9 @@ object RDDAverageScore {
|
||||
val sc = new SparkContext(conf)
|
||||
|
||||
// 读取每个科目的成绩文件
|
||||
val mathRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/math.txt")
|
||||
val chineseRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/chinese.txt")
|
||||
val englishRDD: RDD[String] = sc.textFile("hdfs://192.168.182.100:9000/data/english.txt")
|
||||
val mathRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/math.txt")
|
||||
val chineseRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/chinese.txt")
|
||||
val englishRDD: RDD[String] = sc.textFile("hdfs://hadoop102:8020/input/data/english.txt")
|
||||
|
||||
// 处理每个科目RDD,将每一行数据转化为 (姓名, 分数) 二元组
|
||||
val mathScores: RDD[(String, Int)] = mathRDD.map(line => {
|
||||
@ -41,7 +41,8 @@ object RDDAverageScore {
|
||||
val allScores: RDD[(String, Int)] = mathScores.union(chineseScores).union(englishScores)
|
||||
|
||||
// 对每个学生的所有分数进行聚合 (姓名, (总分, 科目数))
|
||||
val studentScores: RDD[(String, (Int, Int))] = allScores.补充
|
||||
val studentScores: RDD[(String, (Int, Int))] = allScores
|
||||
.map( x => (x._1,(x._2,1)))
|
||||
.reduceByKey { case ((sum1, count1), (sum2, count2)) =>
|
||||
(sum1 + sum2, count1 + count2)
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user