feat(hbase-lesson): 添加新模块 hbase-20250509

- 新增 hbase-20250509 模块用于 HBase 课程项目
- 实现了流量统计和词频统计两个 MapReduce 任务
- 添加了相应的 Mapper、Reducer 和 Driver 类- 创建了输入样例文件- 配置了 Maven 依赖
This commit is contained in:
2025-05-12 09:07:29 +08:00
parent 6ca57600c0
commit b2f5b0e40b
19 changed files with 526 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
package WordCount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
/**
* 一行执行一次
* 100
* 参数一 输入key 数据的偏移量
* 参数二 输入value 行数据
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*
* 用于编写map逻辑的方法
* 一对输入KV执行一次,案例中一行执行一次
* 参数一 输入的key的内容 偏移量
* 参数二 输入的value的内容 line 重点处理它
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] ws = value.toString().split(" ");
for (String word : ws) {
// 单词 1 单词 1
context.write(new Text(word),new IntWritable(1));
}
}
}

View File

@@ -0,0 +1,30 @@
package WordCount;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
/**
* 一个单词 一组 执行一次
* b
* d 两组
* 执行两次
* 单词 b <1,1,1,1,1,1,1,1,1>
* @param key
* @param values
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int number = 0 ;
for (IntWritable value : values) { //
number++ ;
}
context.write(key ,new IntWritable(number));
}
}

View File

@@ -0,0 +1,41 @@
package WordCount;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WorldCountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// 1 配置对象
Configuration conf = new Configuration();
// 2 创建任务对象
Job job = Job.getInstance(conf, "wordcount");
// 2.1 设置 map和reduce任务类
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
//2.2 设置map和reduce 的输出KV
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 2.3 设置reduce的个数 默认1
job.setNumReduceTasks(2);
// 2.3 设置输入和输出路径
// String hdfs_projPath = "hdfs://localhost:9000/user/ccd/HBaseCourseProj/hbase_demo3_mapreduce/";
String hdfs_projPath = "hbase-lesson/hbase-20250509/src/main/java/WordCount";
FileInputFormat.setInputPaths(job, new Path(hdfs_projPath + "/input/"));
FileOutputFormat.setOutputPath(job, new Path(hdfs_projPath + "/output/"));
// 3 提交任务 等待程序执行完毕 返回值是否成功
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}

View File

@@ -0,0 +1,4 @@
a b c d a a a a
a b c d a a a b
a b c d a a c c
a b c d a d d d