每日总结2023/9/27(mapreduce案例)

发布时间 2023-09-27 20:47:26作者: 橘子味芬达水

参考例文、

MapReduce经典案例实战_mapreduce编程案例-CSDN博客

map代码

package cn.com.sise.mapreduce.invertedindex;

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { 
    private static Text result = new Text();
    //输入: <MapReduce file3:2>
    //输出: <MapReduce file1:1;file2:1;file3:2;>
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {
        //生成文档列表
        String fileList = new String();
        for (Text value : values) {
            fileList += value.toString() +";";
        }
        result.set(fileList);
        context.write(key, result);
        }
    }

reduce代码

package cn.com.sise.mapreduce.invertedindex;

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class InvertedIndexReducer extends Reducer<Text, Text, Text, Text> { 
    private static Text result = new Text();
    //输入: <MapReduce file3:2>
    //输出: <MapReduce file1:1;file2:1;file3:2;>
    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context)
            throws IOException, InterruptedException {
        //生成文档列表
        String fileList = new String();
        for (Text value : values) {
            fileList += value.toString() +";";
        }
        result.set(fileList);
        context.write(key, result);
        }
    }

runner代码

package cn.com.sise.mapreduce.invertedindex;

import java.io.IOException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input. FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.yarn.service.api.records.Configuration;

public class InvertedIndexDriver { 
/**
 * @param args
 * @throws InterruptedException 
 * @throws IOException
 * @throws ClassNotFoundException
 */
public static void main(String[] args) throws ClassNotFoundException,IOException,InterruptedException{ 
     Configuration conf = new Configuration();
     Job job = Job.getInstance();
     job.setJarByClass(InvertedIndexDriver.class);
     job.setMapperClass(InvertedIndexMapper.class);
     
     job.setCombinerClass(InvertedIndexCombiner.class);    
     job.setReducerClass(InvertedIndexReducer.class);
     job.setOutputKeyClass(Text.class);
     job.setOutputValueClass(Text.class);
     FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:9000/user/hadoop/inputdata"));
     //指定处理完成之后的结果所保存的位置
     FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/user/hadoop/outputdata"));
     //向yarn集群提交这个job
     boolean res =job.waitForCompletion(true);
     System.exit(res? 0: 1);
     }
}