feat(hbase-lesson): 添加新模块 hbase-20250509

- 新增 hbase-20250509 模块用于 HBase 课程项目
- 实现了流量统计和词频统计两个 MapReduce 任务
- 添加了相应的 Mapper、Reducer 和 Driver 类- 创建了输入样例文件- 配置了 Maven 依赖
This commit is contained in:
2025-05-12 09:07:29 +08:00
parent 6ca57600c0
commit b2f5b0e40b
19 changed files with 526 additions and 0 deletions

View File

@@ -0,0 +1,37 @@
package FlowCount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "FlowCount");
job.setMapperClass(FlowCountMapper.class);
job.setReducerClass(FlowCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.setNumReduceTasks(1);
FileInputFormat.setInputPaths(job, new Path("hbase-lesson/hbase-20250509/src/main/java/FlowCount/input/"));
FileOutputFormat.setOutputPath(job, new Path("hbase-lesson/hbase-20250509/src/main/java/FlowCount/output/"));
job.waitForCompletion(true);
}
}

View File

@@ -0,0 +1,25 @@
package FlowCount;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class FlowCountMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] oneLine = value.toString().split("\\s+");
try {
long upFlow = Long.parseLong(oneLine[1]);
long downFlow = Long.parseLong(oneLine[2]);
long sumFlow = upFlow + downFlow;
context.write(new Text(oneLine[0]), new LongWritable(sumFlow));
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@@ -0,0 +1,20 @@
package FlowCount;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FlowCountReducer extends Reducer<Text, LongWritable, Text, LongWritable> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long count =0;
for(LongWritable value_i: values) {
count+=value_i.get();
}
context.write(key, new LongWritable(count));
}
}

View File

@@ -0,0 +1,7 @@
手机号 上行流量 下行流量
13726230501 200 1100
13396230502 300 1200
13897230503 400 1300
13897230503 100 300
13597230534 500 1400
13597230534 300 1200