在编写WordCount程序时,我遇到了一个奇怪的问题:在HDFS上可以创建output文件夹,但里面却没有程序运行后的结果文件。如图所示,output3文件夹里面的文件数为0:http://s1/mw690/002aBHAXzy6S1S7wrW870&690
这让我百思不得其解。折腾了半天以后,偶然间发现我的Map端和Reduce端的函数误写为了Map()和Reduce(),改为map(
)和reduce( )即可。本人编写的程序如下:
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TxtCounter{
public static void main(String[] args) throws
IOException, ClassNotFoundException, InterruptedException {
Configuration conf=new
Configuration();
String
inputPathString="hdfs://master:9000/inputFile";
String
outputPathString="hdfs://master:9000/output2";
Job job=new
Job(conf,TxtCounter.class.getSimpleName());
job.setMapperClass(TxtMapper.class);
job.setCombinerClass(TxtReducer.class);
job.setReducerClass(TxtReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job,
new Path(inputPathString));
FileOutputFormat.setOutputPath(job,
new Path(outputPathString));
job.waitForCompletion(true);
}
static class TxtMapper extends Mapper{
protected void map(LongWritable
key, Text value, Context context) throws IOException,
InterruptedException{
String[]
strs=value.toString().split(" ");
for(String
str:strs){
context.write(new
Text(str), new IntWritable(1));
}
}
}
static class TxtReducer extends Reducer, Text,
IntWritable>{
protected void reduce(Text key,
Iterable values,Context context) throws IOException,
InterruptedException{
int
sum=0;
Iterator
it=values.iterator();
while(it.hasNext()){
IntWritable
value=it.next();
sum+=value.get();
}
context.write(key,
new IntWritable(sum));
}
}
}
加载中,请稍候......