当前位置:网站首页>MapReduce project case 3 - temperature statistics
MapReduce project case 3 - temperature statistics
2022-06-28 12:02:00 【A vegetable chicken that is working hard】
Make statistics of each month and year , Two days before the highest temperature
1. data
2020-01-02 10:22:22 1c
2020-01-03 10:22:22 2c
2020-01-04 10:22:22 4c
2020-02-01 10:22:22 7c
2020-02-02 10:22:22 9c
2020-02-03 10:22:22 11c
2020-02-04 10:22:22 1c
2019-01-02 10:22:22 1c
2019-01-03 10:22:22 2c
2019-01-04 10:22:22 4c
2019-02-01 10:22:22 7c
2019-02-02 10:22:22 9c
2018-02-03 10:22:22 11c
2018-02-04 10:22:22 1c
2. Demand analysis
- Grouped by year and year
- The first two are sorted by temperature
3.Weather
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
/** * bean */
public class Weather implements WritableComparable<Weather> {
private int year;
private int month;
private int day;
private int degree;// temperature
@Override// Read... In order
public void readFields(DataInput dataInput) throws IOException {
this.year = dataInput.readInt();
this.month = dataInput.readInt();
this.day = dataInput.readInt();
this.degree = dataInput.readInt();
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeInt(year);
dataOutput.writeInt(month);
dataOutput.writeInt(day);
dataOutput.writeInt(degree);
//java.lang.RuntimeException: java.io.EOFException --- The problem of inconsistent serialization and deserialization
}
@Override
public int compareTo(Weather o) {
int t1 = Integer.compare(this.year, o.getYear());
if (t1 == 0) {
int t2 = Integer.compare(this.month, o.getMonth());
if (t2 == 0) {
return -Integer.compare(this.degree, o.getDegree());
}
return t2;
}
return t1;
}
public void setYear(int year) {
this.year = year;
}
public void setMonth(int month) {
this.month = month;
}
public void setDay(int day) {
this.day = day;
}
public void setDegree(int degree) {
this.degree = degree;
}
public int getYear() {
return year;
}
public int getMonth() {
return month;
}
public int getDay() {
return day;
}
public int getDegree() {
return degree;
}
@Override
public String toString() {
return "Weather{" +
"year=" + year +
", month=" + month +
", day=" + day +
", degree=" + degree +
'}';
}
}
4.WeatherMapper
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
/** * 1. mapping , Each mapped row reaches the partition */
public class WeatherMapper extends Mapper<LongWritable, Text, Weather, IntWritable> {
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// Divide according to the separator
String[] split = value.toString().trim().split("\t");
if (split != null && split.length >= 2) {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Calendar calendar = Calendar.getInstance();
try {
Date date = format.parse(split[0]);
calendar.setTime(date);
Weather weather = new Weather();
weather.setYear(calendar.get(Calendar.YEAR));
weather.setMonth(calendar.get(Calendar.MONTH) + 1);// Be careful : Month from 0 Start counting
weather.setDay(calendar.get(Calendar.DAY_OF_MONTH));
int degree = Integer.parseInt(split[1].substring(0, split[1].lastIndexOf("c")));
weather.setDegree(degree);
context.write(weather, new IntWritable(degree));
} catch (ParseException e) {
e.printStackTrace();
}
}
}
}
5.WeatherPartition
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Partitioner;
/** * 2. Partition : Press Weather.year Of hash Value to partition , Make each year a separate reduce, That is, a separate division for each year , Several partitions result in several output files */
public class WeatherPartition extends Partitioner<Weather, IntWritable> {
@Override
public int getPartition(Weather weather, IntWritable intWritable, int numPartitions) {
//numPartitions from job.setNumReduceTasks(3) decision
// Write an algorithm to calculate hash, This algorithm should meet the business requirements , This method is called for each key value , So this method needs to be concise
System.out.println((weather.getYear() - 1929) % numPartitions);
return (weather.getYear() - 1929) % numPartitions;
}
}
6.WeatherGroup
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
/** * 3. grouping : Grouping in partitions ,key The same month and year in are divided into the same group , The default is the same key In the same group * reduce The previous default is the same key In the same group , But at this time, we only need to compare the year and month , Obviously, the grouping method needs to be rewritten * <p> * Without grouping, data in the same partition will be transferred one by one , Can not reach the effect of screening the first two * Group words , According to the following grouping rules ,< years , temperature > In this way, the data of the same month and year is a group of incoming data */
public class WeatherGroup extends WritableComparator {
public WeatherGroup() {
super(Weather.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
Weather weather1 = (Weather) a;
Weather weather2 = (Weather) b;
int c1 = Integer.compare(weather1.getYear(), weather2.getYear());
if (c1 == 0) {
int c2 = Integer.compare(weather1.getMonth(), weather2.getMonth());
return c2;
}
return c1;
}
}
7.WeatherReduce
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
/** * 4. Statute */
public class WeatherReduce extends Reducer<Weather, IntWritable, Text, NullWritable> {
@Override
protected void reduce(Weather key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable i : values) {
count++;
if (count >= 3) {
break;
}
String res = key.getYear() + "-" + key.getMonth() + "-" + key.getDay() + "\t" + i.get();
context.write(new Text(res), NullWritable.get());
}
}
}
8.WeatherMain
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/** * @program: Hadoop_MR * @description: * @author: author * @create: 2022-06-21 16:45 */
public class WeatherMain {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance();// establish job example
job.setJarByClass(WeatherMain.class);
//1.map
job.setMapperClass(WeatherMapper.class);
// Output types except Text It's best to set it all , Otherwise, there is no output
job.setMapOutputKeyClass(Weather.class);
job.setMapOutputValueClass(IntWritable.class);
//2. Partition
job.setPartitionerClass(WeatherPartition.class);
// Set up reduce number
job.setNumReduceTasks(3);//output Output three folders
//3. Sort
//job.setSortComparatorClass(WeatherSort.class);
//4.reduce Internal grouping
job.setGroupingComparatorClass(WeatherGroup.class);
//5.reduce
job.setReducerClass(WeatherReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.addInputPath(job, new Path("E:\\HadoopMRData\\input"));// Enter Directory
Path outPath = new Path("E:\\HadoopMRData\\output");
/*FileSystem fs = FileSystem.get(conf); if (fs.exists(outPath)) { fs.delete(outPath, true); }*/
FileOutputFormat.setOutputPath(job, outPath);
/*FileInputFormat.addInputPath(job, new Path(args[0]));// The command line runtime passes in FileOutputFormat.setOutputPath(job, new Path(args[1]));*/
System.exit(job.waitForCompletion(true) ? 0 : 1);// start-up ,0 Indicates normal exit
}
}
边栏推荐
- Everyone can participate in open source! Here comes the most important developer activity in dragon lizard community
- SEO优化的许多好处是与流量有直接关系
- What is DAPP system development and analytical understanding
- AcWing 605. Simple product (implemented in C language)
- Software test interview classic + 1000 high-frequency real questions, and the hit rate of big companies is 80%
- Difference (one dimension)
- day36 js笔记 ECMA6语法 2021.10.09
- day33 js笔记 事件(下)2021.09.28
- Leetcode 48. 旋转图像(可以,已解决)
- If you want to change to software testing, how can you package your resume as a test engineer with 1 year of work experience
猜你喜欢

New listing of operation light 3.0 - a sincere work of self subversion across the times!

day30 js笔记 BOM和DOM 2021.09.24

Industry analysis - quick intercom, building intercom

Redis 原理 - List

Day39 prototype chain and page fireworks effect 2021.10.13

建立自己的网站(18)

Remote login sshd service

Day34 JS notes regular expression 2021.09.29

Fancy features and cheap prices! What is the true strength of Changan's new SUV?

day34 js笔记 正则表达式 2021.09.29
随机推荐
MapReduce项目案例1
Day28 strict mode, string JS 2021.09.22
The development and principle of the metacosmic system
Practice and Thinking on the architecture of a set of 100000 TPS im integrated message system
Chapter 2 do you remember the point, line and surface (2)
来吧元宇宙,果然这热度一时半会儿过不去了
Jetpack Compose Desktop 桌面版本的打包和发布应用
6. calculation index
5. Sum of N numbers
IO stream of file and Base64
Pre parsing, recursive functions and events in day25 JS 2021.09.16
Connectionreseterror: [winerror 10054] the remote host forced an existing connection to be closed
2022 open source software security status report: over 41% of enterprises do not have enough confidence in open source security
【无标题】虚拟机vmnet0找不到且报错:没有未桥接的主机网络适配器
Leetcode 48. 旋转图像(可以,已解决)
Training notice | special training notice on epidemic prevention and security prevention for overseas Chinese funded enterprises, institutions and personnel in 2022
6.A-B
Simple understanding of ThreadLocal
Is it feasible to be a programmer at the age of 26?
day29 js笔记 2021.09.23