feat(hbase-lesson): 添加新模块 hbase-20250509
- 新增 hbase-20250509 模块用于 HBase 课程项目 - 实现了流量统计和词频统计两个 MapReduce 任务 - 添加了相应的 Mapper、Reducer 和 Driver 类- 创建了输入样例文件- 配置了 Maven 依赖
This commit is contained in:
@@ -0,0 +1,77 @@
|
||||
package FlowCount2;
|
||||
|
||||
import org.apache.hadoop.io.Writable;
|
||||
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
|
||||
//implements关键字用于接口的实现,接口定义一个类应该如何操作的蓝图
|
||||
public class FlowBean implements Writable {
|
||||
private long upFlow;
|
||||
private long downFlow;
|
||||
private long sumFlow;
|
||||
|
||||
public FlowBean(){
|
||||
|
||||
}
|
||||
|
||||
public FlowBean(long upFlow, long downFlow){
|
||||
this.upFlow = upFlow;
|
||||
this.downFlow =downFlow;
|
||||
this.sumFlow = upFlow+downFlow;
|
||||
|
||||
}
|
||||
|
||||
public long getUpFlow() {
|
||||
return upFlow;
|
||||
}
|
||||
|
||||
public void setUpFlow(long upFlow) {
|
||||
this.upFlow = upFlow;
|
||||
}
|
||||
|
||||
public long getDownFlow() {
|
||||
return downFlow;
|
||||
}
|
||||
|
||||
public void setDownFlow(long downFlow) {
|
||||
this.downFlow = downFlow;
|
||||
}
|
||||
|
||||
public long getSumFlow() {
|
||||
return sumFlow;
|
||||
}
|
||||
|
||||
public void setSumFlow(long sumFlow) {
|
||||
this.sumFlow = sumFlow;
|
||||
}
|
||||
|
||||
//写的方法 序列化方式 Java原始数据类型写入到字节流
|
||||
public void write(DataOutput out) throws IOException {
|
||||
out.writeLong(this.upFlow);
|
||||
out.writeLong(this.downFlow);
|
||||
out.writeLong(this.sumFlow);
|
||||
|
||||
}
|
||||
|
||||
//读的方法 反序列化 从字节流中读取Java原始数据类型的值
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
this.upFlow = in.readLong();
|
||||
this.downFlow = in.readLong();
|
||||
this.sumFlow =in.readLong();
|
||||
|
||||
}
|
||||
|
||||
// Object 类的 toString() 方法返回的格式是 ClassName@hashcode
|
||||
// 重写 toString() 方法
|
||||
// 当MapReduce框架需要将 FlowBean 对象转换为字符串形式会调用自定义的 toString() 方法
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FlowBean{" +
|
||||
"upFlow=" + upFlow +
|
||||
", downFlow=" + downFlow +
|
||||
", sumFlow=" + sumFlow +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package FlowCount2;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class FlowCountDriver {
|
||||
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
|
||||
Configuration conf = new Configuration();
|
||||
|
||||
Job job = Job.getInstance(conf, "FlowBean");
|
||||
|
||||
job.setMapperClass(FlowCountMapper.class);
|
||||
job.setReducerClass(FlowCountReducer.class);
|
||||
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(FlowBean.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(FlowBean.class);
|
||||
|
||||
job.setNumReduceTasks(1);
|
||||
|
||||
FileInputFormat.setInputPaths(job, new Path("hbase-lesson/hbase-20250509/src/main/java/FlowCount2/input/"));
|
||||
FileOutputFormat.setOutputPath(job, new Path("hbase-lesson/hbase-20250509/src/main/java/FlowCount2/output/"));
|
||||
|
||||
job.waitForCompletion(true);
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
package FlowCount2;
|
||||
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class FlowCountMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
|
||||
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String[] oneLine = value.toString().split("\\s+");
|
||||
try {
|
||||
long upFlow = Long.parseLong(oneLine[1]);
|
||||
long downFlow = Long.parseLong(oneLine[2]);
|
||||
long sumFlow = upFlow + downFlow;
|
||||
|
||||
context.write(new Text(oneLine[0]), new FlowBean(upFlow, downFlow));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
package FlowCount2;
|
||||
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class FlowCountReducer extends Reducer<Text, FlowBean, Text, FlowBean> {
|
||||
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<FlowBean> values, Context context) throws IOException, InterruptedException {
|
||||
long sumUpFLow =0;
|
||||
long sumDownFLow =0;
|
||||
for(FlowBean value_i: values) {
|
||||
sumUpFLow+=value_i.getUpFlow();
|
||||
sumDownFLow+=value_i.getDownFlow();
|
||||
}
|
||||
|
||||
context.write(key, new FlowBean(sumUpFLow, sumDownFLow));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
手机号 上行流量 下行流量
|
||||
13726230501 200 1100
|
||||
13396230502 300 1200
|
||||
13897230503 400 1300
|
||||
13897230503 100 300
|
||||
13597230534 500 1400
|
||||
13597230534 300 1200
|
||||
Reference in New Issue
Block a user