当前位置：网站首页>Encodermappreduce notes

Encodermappreduce notes

2022-07-06 11:44:00 【@Little snail】

Catalog

1、MapReduce Definition
2、MapReduce Statistics in two text files , The number of times each word appears
hadoop Of MapReduce And hdfs Be sure to start it first start-dfs.sh
3、 use MapReduce Calculate the best grades of each student in the class
4、 MapReduce The contents of the document are merged and duplicated

1、MapReduce Definition

 Cut the data into three pieces , Then calculate and process these data separately （Map）,
 After processing, it is sent to a machine for merging （merge）,
 Then calculate the merged data , inductive （reduce） And the output .

Java Contained in the 3 A function ：
map Split the dataset
reduce Processing data
job Object to run MapReduce Homework ,

2、MapReduce Statistics in two text files , The number of times each word appears

First, we create two files in the current directory ：

establish file01 Input content ：
Hello World Bye World
establish file02 Input content ：
Hello Hadoop Goodbye Hadoop
Upload files to HDFS Of /usr/input/ Under the table of contents ：
Don't forget to start DFS：
start-dfs.sh

public class WordCount {
      
//Mapper class  
/* Because the file has the number of lines by default ,LongWritable Is used to accept the number of lines in the file ,  first Text It is used to accept the contents of the file ,  the second Text Is used to output to Reduce Class key, IntWritable Is used to output to Reduce Class value*/  
 public static class TokenizerMapper   
       extends Mapper<LongWritable, Text, Text, IntWritable>{
      
    private final static IntWritable one = new IntWritable(1);  
    private Text word = new Text();  
    public void map(LongWritable key, Text value, Context context  
                    ) throws IOException, InterruptedException {
      
      StringTokenizer itr = new StringTokenizer(value.toString());  
      while (itr.hasMoreTokens()) {
      
        word.set(itr.nextToken());  
        context.write(word, one);  
      }  
    }  
  }  
  public static class IntSumReducer   
       extends Reducer<Text,IntWritable,Text,IntWritable> {
      
    private IntWritable result = new IntWritable();  
    public void reduce(Text key, Iterable<IntWritable> values,   
                       Context context  
                       ) throws IOException, InterruptedException {
      
      int sum = 0;  
      for (IntWritable val : values) {
      
        sum += val.get();  
      }  
      result.set(sum);  
      context.write(key, result);  
    }  
  }  
  public static void main(String[] args) throws Exception {
      
    // Creating configuration objects  
    Configuration conf = new Configuration();  
    // establish job object  
    Job job = new Job(conf, "word count");  
    // Set up run job Class  
    job.setJarByClass(WordCount.class);  
    // Set up Mapper Class  
    job.setMapperClass(TokenizerMapper.class);  
    // Set up Reduce Class  
    job.setReducerClass(IntSumReducer.class);  
    // Set output key value Format  
    job.setOutputKeyClass(Text.class);  
    job.setOutputValueClass(IntWritable.class);  
    // Set the input path  
    String inputfile = "/usr/input";  
    // Set output path  
    String outputFile = "/usr/output";  
    // Perform input  
    FileInputFormat.addInputPath(job, new Path(inputfile));  
    // Execution output  
    FileOutputFormat.setOutputPath(job, new Path(outputFile));  
    // Whether it runs successfully or not ,true Output 0,false Output 1 
    System.exit(job.waitForCompletion(true) ? 0 : 1);  
  }  
}

hadoop Of MapReduce And hdfs Be sure to start it first start-dfs.sh

3、 use MapReduce Calculate the best grades of each student in the class

import java.io.IOException;
import java.util.StringTokenizer;
 
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WordCount {
      
    /********** Begin **********/  
    public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
      
        private final static IntWritable one = new IntWritable(1);  
        private Text word = new Text();  
        private int maxValue = 0;  
        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      
            StringTokenizer itr = new StringTokenizer(value.toString(),"\n");  
            while (itr.hasMoreTokens()) {
      
                String[] str = itr.nextToken().split(" ");  
                String name = str[0];  
                one.set(Integer.parseInt(str[1]));  
                word.set(name);  
                context.write(word,one);  
            }  
            //context.write(word,one); 
        }  
    }
    public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
      
        private IntWritable result = new IntWritable();
        public void reduce(Text key, Iterable<IntWritable> values, Context context)  
                throws IOException, InterruptedException {
      
            **int maxAge = 0;  
            int age = 0;  
            for (IntWritable intWritable : values) {
      
                maxAge = Math.max(maxAge, intWritable.get());  
            }  
            result.set(maxAge);**  
            context.write(key, result);  
        }  
    }
    public static void main(String[] args) throws Exception {
      
        Configuration conf = new Configuration();  
        Job job = new Job(conf, "word count");  
        job.setJarByClass(WordCount.class);  
        job.setMapperClass(TokenizerMapper.class);  
        job.setCombinerClass(IntSumReducer.class);  
        job.setReducerClass(IntSumReducer.class);  
        job.setOutputKeyClass(Text.class);  
        job.setOutputValueClass(IntWritable.class);  
        String inputfile = "/user/test/input";  
        String outputFile = "/user/test/output/";  
        FileInputFormat.addInputPath(job, new Path(inputfile));  
        FileOutputFormat.setOutputPath(job, new Path(outputFile));  
        job.waitForCompletion(true);  
    /********** End **********/  
    }  
}

4、 MapReduce The contents of the document are merged and duplicated

import java.io.IOException;
import java.util.*;  
import org.apache.hadoop.conf.Configuration;  
import org.apache.hadoop.fs.Path;  
import org.apache.hadoop.io.*;  
import org.apache.hadoop.mapreduce.Job;  
import org.apache.hadoop.mapreduce.Mapper;  
import org.apache.hadoop.mapreduce.Reducer;  
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;  
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;  
import org.apache.hadoop.util.GenericOptionsParser;  
public class Merge {
      
    /** * @param args *  Yes A,B Merge two files , And get rid of the repetition , Get a new output file C */  
    // Reload here map function , Directly input the value Copy to output data key On   Pay attention to map Method to throw an exception ：throws IOException,InterruptedException 
    /********** Begin **********/  
    public static class Map extends Mapper<LongWritable, Text, Text, Text >  
    {
      
        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)  
                throws IOException, InterruptedException {
      
            String str = value.toString();  
            String[] data = str.split(" ");  
            Text t1= new Text(data[0]);  
            Text t2 = new Text(data[1]);  
            context.write(t1,t2);  
        }  
    }   
    /********** End **********/  
    // Reload here reduce function , Directly input the key Copy to output data key On   Pay attention to reduce Throw an exception on the method ：throws IOException,InterruptedException 
    /********** Begin **********/  
    public static class Reduce  extends Reducer<Text, Text, Text, Text>  
    {
      
        protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)  
                throws IOException, InterruptedException {
      
            List<String> list = new ArrayList<>();  
            for (Text text : values) {
      
                String str = text.toString();  
                if(!list.contains(str)){
      
                    list.add(str);  
                }  
            }  
            Collections.sort(list);  
            for (String text : list) {
      
                context.write(key, new Text(text));  
            }  
        }  
    /********** End **********/  
    }  
    public static void main(String[] args) throws Exception{
      
        Configuration conf = new Configuration();  
         Job job = new Job(conf, "word count");  
        job.setJarByClass(Merge.class);  
        job.setMapperClass(Map.class);  
        job.setCombinerClass(Reduce.class);  
        job.setReducerClass(Reduce.class);  
        job.setOutputKeyClass(Text.class);  
        job.setOutputValueClass(Text.class);  
        String inputPath = "/user/tmp/input/";  // Set the input path here  
        String outputPath = "/user/tmp/output/";  // Set the output path here  
        FileInputFormat.addInputPath(job, new Path(inputPath));  
        FileOutputFormat.setOutputPath(job, new Path(outputPath));  
        System.exit(job.waitForCompletion(true) ? 0 : 1);  
    }  
}