当前位置:网站首页>Encodermappreduce notes
Encodermappreduce notes
2022-07-06 11:44:00 【@Little snail】
Catalog
- 1、MapReduce Definition
- 2、MapReduce Statistics in two text files , The number of times each word appears
- hadoop Of MapReduce And hdfs Be sure to start it first start-dfs.sh
- 3、 use MapReduce Calculate the best grades of each student in the class
- 4、 MapReduce The contents of the document are merged and duplicated
1、MapReduce Definition
Cut the data into three pieces , Then calculate and process these data separately (Map),
After processing, it is sent to a machine for merging (merge),
Then calculate the merged data , inductive (reduce) And the output .
Java Contained in the 3 A function :
map Split the dataset
reduce Processing data
job Object to run MapReduce Homework ,
2、MapReduce Statistics in two text files , The number of times each word appears
First, we create two files in the current directory :
establish file01 Input content :
Hello World Bye World
establish file02 Input content :
Hello Hadoop Goodbye Hadoop
Upload files to HDFS Of /usr/input/ Under the table of contents :
Don't forget to start DFS:
start-dfs.sh
public class WordCount {
//Mapper class
/* Because the file has the number of lines by default ,LongWritable Is used to accept the number of lines in the file , first Text It is used to accept the contents of the file , the second Text Is used to output to Reduce Class key, IntWritable Is used to output to Reduce Class value*/
public static class TokenizerMapper
extends Mapper<LongWritable, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(LongWritable key, Text value, Context context
) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.write(word, one);
}
}
}
public static class IntSumReducer
extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values,
Context context
) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
// Creating configuration objects
Configuration conf = new Configuration();
// establish job object
Job job = new Job(conf, "word count");
// Set up run job Class
job.setJarByClass(WordCount.class);
// Set up Mapper Class
job.setMapperClass(TokenizerMapper.class);
// Set up Reduce Class
job.setReducerClass(IntSumReducer.class);
// Set output key value Format
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// Set the input path
String inputfile = "/usr/input";
// Set output path
String outputFile = "/usr/output";
// Perform input
FileInputFormat.addInputPath(job, new Path(inputfile));
// Execution output
FileOutputFormat.setOutputPath(job, new Path(outputFile));
// Whether it runs successfully or not ,true Output 0,false Output 1
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
hadoop Of MapReduce And hdfs Be sure to start it first start-dfs.sh
3、 use MapReduce Calculate the best grades of each student in the class
import java.io.IOException;
import java.util.StringTokenizer;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class WordCount {
/********** Begin **********/
public static class TokenizerMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
private int maxValue = 0;
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
StringTokenizer itr = new StringTokenizer(value.toString(),"\n");
while (itr.hasMoreTokens()) {
String[] str = itr.nextToken().split(" ");
String name = str[0];
one.set(Integer.parseInt(str[1]));
word.set(name);
context.write(word,one);
}
//context.write(word,one);
}
}
public static class IntSumReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
**int maxAge = 0;
int age = 0;
for (IntWritable intWritable : values) {
maxAge = Math.max(maxAge, intWritable.get());
}
result.set(maxAge);**
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(WordCount.class);
job.setMapperClass(TokenizerMapper.class);
job.setCombinerClass(IntSumReducer.class);
job.setReducerClass(IntSumReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
String inputfile = "/user/test/input";
String outputFile = "/user/test/output/";
FileInputFormat.addInputPath(job, new Path(inputfile));
FileOutputFormat.setOutputPath(job, new Path(outputFile));
job.waitForCompletion(true);
/********** End **********/
}
}
4、 MapReduce The contents of the document are merged and duplicated
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class Merge {
/** * @param args * Yes A,B Merge two files , And get rid of the repetition , Get a new output file C */
// Reload here map function , Directly input the value Copy to output data key On Pay attention to map Method to throw an exception :throws IOException,InterruptedException
/********** Begin **********/
public static class Map extends Mapper<LongWritable, Text, Text, Text >
{
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
String str = value.toString();
String[] data = str.split(" ");
Text t1= new Text(data[0]);
Text t2 = new Text(data[1]);
context.write(t1,t2);
}
}
/********** End **********/
// Reload here reduce function , Directly input the key Copy to output data key On Pay attention to reduce Throw an exception on the method :throws IOException,InterruptedException
/********** Begin **********/
public static class Reduce extends Reducer<Text, Text, Text, Text>
{
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
throws IOException, InterruptedException {
List<String> list = new ArrayList<>();
for (Text text : values) {
String str = text.toString();
if(!list.contains(str)){
list.add(str);
}
}
Collections.sort(list);
for (String text : list) {
context.write(key, new Text(text));
}
}
/********** End **********/
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
Job job = new Job(conf, "word count");
job.setJarByClass(Merge.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
String inputPath = "/user/tmp/input/"; // Set the input path here
String outputPath = "/user/tmp/output/"; // Set the output path here
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
边栏推荐
- Learning question 1:127.0.0.1 refused our visit
- Solve the problem of installing failed building wheel for pilot
- Connexion sans mot de passe du noeud distribué
- Vs2019 use wizard to generate an MFC Application
- About string immutability
- 【Flink】CDH/CDP Flink on Yarn 日志配置
- Vs2019 desktop app quick start
- 2020 WANGDING cup_ Rosefinch formation_ Web_ nmap
- MATLAB学习和实战 随手记
- L2-007 family real estate (25 points)
猜你喜欢
Rhcsa certification exam exercise (configured on the first host)
【yarn】CDP集群 Yarn配置capacity调度器批量分配
Did you forget to register or load this tag
Face recognition_ recognition
Vs2019 desktop app quick start
AcWing 1298. Solution to Cao Chong's pig raising problem
[Flink] Flink learning
机器学习笔记-Week02-卷积神经网络
Error connecting to MySQL database: 2059 - authentication plugin 'caching_ sha2_ The solution of 'password'
分布式節點免密登錄
随机推荐
C语言读取BMP文件
使用lambda在循环中传参时,参数总为同一个值
wangeditor富文本组件-复制可用
Wangeditor rich text component - copy available
How to set up voice recognition on the computer with shortcut keys
ES6 let 和 const 命令
Number game
Small L's test paper
Codeforces Round #771 (Div. 2)
Antlr4 uses keywords as identifiers
Solve the problem of installing failed building wheel for pilot
[Flink] Flink learning
Word排版(小計)
Database advanced learning notes -- SQL statement
Word typesetting (subtotal)
ES6 Promise 对象
常用正则表达式整理
分布式节点免密登录
Kept VRRP script, preemptive delay, VIP unicast details
Django running error: error loading mysqldb module solution