feat(hbase-lesson): 添加新模块 hbase-20250509

- 新增 hbase-20250509 模块用于 HBase 课程项目 - 实现了流量统计和词频统计两个 MapReduce 任务 - 添加了相应的 Mapper、Reducer 和 Driver 类- 创建了输入样例文件- 配置了 Maven 依赖
2025-05-12 09:07:29 +08:00
parent 6ca57600c0
commit b2f5b0e40b
19 changed files with 526 additions and 0 deletions
--- a/hbase-lesson/hbase-20250509/src/main/java/WordCount/WordCountMapper.java
+++ b/hbase-lesson/hbase-20250509/src/main/java/WordCount/WordCountMapper.java
@@ -0,0 +1,35 @@
+package WordCount;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Mapper;
+
+import java.io.IOException;
+
+public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
+    /**
+     * 一行执行一次
+     *  100
+     * 参数一 输入key    数据的偏移量
+     * 参数二 输入value   行数据
+     * @param key
+     * @param value
+     * @param context
+     * @throws IOException
+     * @throws InterruptedException
+     *
+     * 用于编写map逻辑的方法
+     * 一对输入KV执行一次,案例中一行执行一次
+     * 参数一 输入的key的内容  偏移量
+     * 参数二 输入的value的内容  line   重点处理它
+     */
+    @Override
+    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
+        String[] ws = value.toString().split(" ");
+        for (String word : ws) {
+            // 单词 1  单词 1
+            context.write(new Text(word),new IntWritable(1));
+        }
+    }
+}
--- a/hbase-lesson/hbase-20250509/src/main/java/WordCount/WordCountReducer.java
+++ b/hbase-lesson/hbase-20250509/src/main/java/WordCount/WordCountReducer.java
@@ -0,0 +1,30 @@
+package WordCount;
+
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Reducer;
+
+import java.io.IOException;
+
+public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
+    /**
+     * 一个单词 一组  执行一次
+     * b
+     * d 两组
+     * 执行两次
+     * 单词 b  <1,1,1,1,1,1,1,1,1>
+     * @param key
+     * @param values
+     * @param context
+     * @throws IOException
+     * @throws InterruptedException
+     */
+    @Override
+    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
+        int number = 0 ;
+        for (IntWritable value : values) {  //
+            number++ ;
+        }
+        context.write(key ,new IntWritable(number));
+    }
+}
--- a/hbase-lesson/hbase-20250509/src/main/java/WordCount/WorldCountDriver.java
+++ b/hbase-lesson/hbase-20250509/src/main/java/WordCount/WorldCountDriver.java
@@ -0,0 +1,41 @@
+package WordCount;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import java.io.IOException;
+
+public class WorldCountDriver {
+    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
+        // 1 配置对象
+        Configuration conf = new Configuration();
+        // 2 创建任务对象
+        Job job = Job.getInstance(conf, "wordcount");
+        // 2.1 设置 map和reduce任务类
+        job.setMapperClass(WordCountMapper.class);
+        job.setReducerClass(WordCountReducer.class);
+        //2.2 设置map和reduce 的输出KV
+        job.setMapOutputKeyClass(Text.class);
+        job.setMapOutputValueClass(IntWritable.class);
+        job.setOutputKeyClass(Text.class);
+        job.setOutputValueClass(IntWritable.class);
+        // 2.3 设置reduce的个数  默认1
+        job.setNumReduceTasks(2);
+        // 2.3 设置输入和输出路径
+
+//        String hdfs_projPath = "hdfs://localhost:9000/user/ccd/HBaseCourseProj/hbase_demo3_mapreduce/";
+        String hdfs_projPath = "hbase-lesson/hbase-20250509/src/main/java/WordCount";
+
+        FileInputFormat.setInputPaths(job, new Path(hdfs_projPath + "/input/"));
+        FileOutputFormat.setOutputPath(job, new Path(hdfs_projPath + "/output/"));
+
+        // 3 提交任务  等待程序执行完毕   返回值是否成功
+        boolean b = job.waitForCompletion(true);
+        System.exit(b ? 0 : 1);
+    }
+}
--- a/hbase-lesson/hbase-20250509/src/main/java/WordCount/input/wordcountSample.txt
+++ b/hbase-lesson/hbase-20250509/src/main/java/WordCount/input/wordcountSample.txt
@@ -0,0 +1,4 @@
+a b c d a a a a
+a b c d a a a b
+a b c d a a c c
+a b c d a d d d