feat(hbase-lesson): 添加新模块 hbase-20250509
- 新增 hbase-20250509 模块用于 HBase 课程项目 - 实现了流量统计和词频统计两个 MapReduce 任务 - 添加了相应的 Mapper、Reducer 和 Driver 类- 创建了输入样例文件- 配置了 Maven 依赖
This commit is contained in:
@@ -0,0 +1,35 @@
|
||||
package WordCount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.LongWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
|
||||
/**
|
||||
* 一行执行一次
|
||||
* 100
|
||||
* 参数一 输入key 数据的偏移量
|
||||
* 参数二 输入value 行数据
|
||||
* @param key
|
||||
* @param value
|
||||
* @param context
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
*
|
||||
* 用于编写map逻辑的方法
|
||||
* 一对输入KV执行一次,案例中一行执行一次
|
||||
* 参数一 输入的key的内容 偏移量
|
||||
* 参数二 输入的value的内容 line 重点处理它
|
||||
*/
|
||||
@Override
|
||||
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String[] ws = value.toString().split(" ");
|
||||
for (String word : ws) {
|
||||
// 单词 1 单词 1
|
||||
context.write(new Text(word),new IntWritable(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
package WordCount;
|
||||
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
|
||||
/**
|
||||
* 一个单词 一组 执行一次
|
||||
* b
|
||||
* d 两组
|
||||
* 执行两次
|
||||
* 单词 b <1,1,1,1,1,1,1,1,1>
|
||||
* @param key
|
||||
* @param values
|
||||
* @param context
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int number = 0 ;
|
||||
for (IntWritable value : values) { //
|
||||
number++ ;
|
||||
}
|
||||
context.write(key ,new IntWritable(number));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package WordCount;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class WorldCountDriver {
|
||||
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
|
||||
// 1 配置对象
|
||||
Configuration conf = new Configuration();
|
||||
// 2 创建任务对象
|
||||
Job job = Job.getInstance(conf, "wordcount");
|
||||
// 2.1 设置 map和reduce任务类
|
||||
job.setMapperClass(WordCountMapper.class);
|
||||
job.setReducerClass(WordCountReducer.class);
|
||||
//2.2 设置map和reduce 的输出KV
|
||||
job.setMapOutputKeyClass(Text.class);
|
||||
job.setMapOutputValueClass(IntWritable.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
// 2.3 设置reduce的个数 默认1
|
||||
job.setNumReduceTasks(2);
|
||||
// 2.3 设置输入和输出路径
|
||||
|
||||
// String hdfs_projPath = "hdfs://localhost:9000/user/ccd/HBaseCourseProj/hbase_demo3_mapreduce/";
|
||||
String hdfs_projPath = "hbase-lesson/hbase-20250509/src/main/java/WordCount";
|
||||
|
||||
FileInputFormat.setInputPaths(job, new Path(hdfs_projPath + "/input/"));
|
||||
FileOutputFormat.setOutputPath(job, new Path(hdfs_projPath + "/output/"));
|
||||
|
||||
// 3 提交任务 等待程序执行完毕 返回值是否成功
|
||||
boolean b = job.waitForCompletion(true);
|
||||
System.exit(b ? 0 : 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,4 @@
|
||||
a b c d a a a a
|
||||
a b c d a a a b
|
||||
a b c d a a c c
|
||||
a b c d a d d d
|
||||
Reference in New Issue
Block a user