博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
hadoop的WordCount样例
阅读量:4942 次
发布时间:2019-06-11

本文共 2327 字,大约阅读时间需要 7 分钟。

package cn.lmj.mapreduce;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
public class WordCount

{

//mapper

public static class WordCountMapper extends MapReduceBase implements Mapper<LongWritable,Text,Text,LongWritable>
{
LongWritable count = new LongWritable(1);
Text content = new Text();
@Override
public void map(LongWritable key, Text value,
OutputCollector<Text, LongWritable> output, Reporter report)
throws IOException

{

//切割字符串

String str = value.toString();
String[] arr = str.split(" ");
for(String s : arr)
{
content.set(s);
output.collect(content,count);
}
}
}

//reducer

public static class WordCountReduce extends MapReduceBase implements Reducer<Text,LongWritable,Text,LongWritable>
{
@Override
public void reduce(Text key, Iterator<LongWritable> values,
OutputCollector<Text, LongWritable> output, Reporter rep)
throws IOException

{

//将同样key的value累加

long sum = 0;
while(values.hasNext())
{
sum+=values.next().get();
}
output.collect(key,new LongWritable(sum));
}
}
public static void main(String[] args) throws Exception

{

//创建一个JobConf

JobConf conf = new JobConf(WordCount2.class);
conf.setJobName("lmj");

//设置输出类型

conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);

//设置MapCombineReduce处理类

conf.setMapperClass(WordCountMapper.class);
conf.setCombinerClass(WordCountReduce.class);
conf.setReducerClass(WordCountReduce.class);

//设置输入类型

conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);

//设置输入和输出文件夹

FileInputFormat.setInputPaths(conf,new Path("/aaa/hadoop.txt"));
FileOutputFormat.setOutputPath(conf,new Path("/aaa/output"));

//启动jobConf

JobClient.runJob(conf);
}
}

转载于:https://www.cnblogs.com/mengfanrong/p/3860304.html

你可能感兴趣的文章
python之文件路径截取 & endswith()
查看>>
浏览器内的事件队列
查看>>
idea激活
查看>>
使用ML.NET实现白葡萄酒品质预测
查看>>
Linux Shell统计每秒钟内文件增加行数
查看>>
使用Button组件
查看>>
c语言字符串处理函数
查看>>
牛客网-对称与反对称 【逆元】
查看>>
[haoi2009]求回文串
查看>>
双亲数 容斥
查看>>
RabbitMQ管理界面
查看>>
AutoMapper: Mapper.Initialize() 只能调用一次,Why?
查看>>
基础回顾之可变参数
查看>>
闲说测试
查看>>
[译]开闭原则
查看>>
四种简单的排序算法
查看>>
天外有天
查看>>
吴恩达《深度学习》第二门课(3)超参数调试、Batch正则化和程序框架
查看>>
[国嵌笔记][010][TFTP与NFS服务器配置]
查看>>
SEO 统计算法
查看>>