当前位置:网站首页>The whole process scheduling, MySQL and Sqoop
The whole process scheduling, MySQL and Sqoop
2022-07-31 02:39:00 【Sisi】
1 创建MySQL数据库和表
创建gmall_report数据库
建表
DROP TABLE IF EXISTS ads_visit_stats;
CREATE TABLE `ads_visit_stats` (
`dt` DATE NOT NULL COMMENT '统计日期',
`is_new` VARCHAR(255) NOT NULL COMMENT '新老标识,1:新,0:老',
`recent_days` INT NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`channel` VARCHAR(255) NOT NULL COMMENT '渠道',
`uv_count` BIGINT(20) DEFAULT NULL COMMENT '日活(访问人数)',
`duration_sec` BIGINT(20) DEFAULT NULL COMMENT '页面停留总时长',
`avg_duration_sec` BIGINT(20) DEFAULT NULL COMMENT '一次会话,页面停留平均时长',
`page_count` BIGINT(20) DEFAULT NULL COMMENT '页面总浏览数',
`avg_page_count` BIGINT(20) DEFAULT NULL COMMENT '一次会话,页面平均浏览数',
`sv_count` BIGINT(20) DEFAULT NULL COMMENT '会话次数',
`bounce_count` BIGINT(20) DEFAULT NULL COMMENT '跳出数',
`bounce_rate` DECIMAL(16,2) DEFAULT NULL COMMENT '跳出率',
PRIMARY KEY (`dt`,`recent_days`,`is_new`,`channel`)
) ENGINE=INNODB DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS ads_page_path;
CREATE TABLE `ads_page_path` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`source` VARCHAR(255) DEFAULT NULL COMMENT '跳转起始页面',
`target` VARCHAR(255) DEFAULT NULL COMMENT '跳转终到页面',
`path_count` BIGINT(255) DEFAULT NULL COMMENT '跳转次数',
UNIQUE KEY (`dt`,`recent_days`,`source`,`target`) USING BTREE
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_user_total;
CREATE TABLE `ads_user_total` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,0:累积值,1:最近1天,7:最近7天,30:最近30天',
`new_user_count` BIGINT(20) DEFAULT NULL COMMENT '新注册用户数',
`new_order_user_count` BIGINT(20) DEFAULT NULL COMMENT '新增下单用户数',
`order_final_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '下单总金额',
`order_user_count` BIGINT(20) DEFAULT NULL COMMENT '下单用户数',
`no_order_user_count` BIGINT(20) DEFAULT NULL COMMENT '未下单用户数(具体指活跃用户中未下单用户)',
PRIMARY KEY (`dt`,`recent_days`)
) ENGINE=INNODB DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS ads_user_change;
CREATE TABLE `ads_user_change` (
`dt` DATE NOT NULL COMMENT '统计日期',
`user_churn_count` BIGINT(20) DEFAULT NULL COMMENT '流失用户数',
`user_back_count` BIGINT(20) DEFAULT NULL COMMENT '回流用户数',
PRIMARY KEY (`dt`)
) ENGINE=INNODB DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS ads_user_action;
CREATE TABLE `ads_user_action` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`home_count` BIGINT(20) DEFAULT NULL COMMENT '浏览首页人数',
`good_detail_count` BIGINT(20) DEFAULT NULL COMMENT '浏览商品详情页人数',
`cart_count` BIGINT(20) DEFAULT NULL COMMENT '加入购物车人数',
`order_count` BIGINT(20) DEFAULT NULL COMMENT '下单人数',
`payment_count` BIGINT(20) DEFAULT NULL COMMENT '支付人数',
PRIMARY KEY (`dt`,`recent_days`) USING BTREE
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_user_retention;
CREATE TABLE `ads_user_retention` (
`dt` DATE DEFAULT NULL COMMENT '统计日期',
`create_date` VARCHAR(255) NOT NULL COMMENT '用户新增日期',
`retention_day` BIGINT(20) NOT NULL COMMENT '截至当前日期留存天数',
`retention_count` BIGINT(20) DEFAULT NULL COMMENT '留存用户数量',
`new_user_count` BIGINT(20) DEFAULT NULL COMMENT '新增用户数量',
`retention_rate` DECIMAL(16,2) DEFAULT NULL COMMENT '留存率',
PRIMARY KEY (`create_date`,`retention_day`) USING BTREE
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_order_total;
CREATE TABLE `ads_order_total` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`order_count` BIGINT(255) DEFAULT NULL COMMENT '订单数',
`order_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '订单金额',
`order_user_count` BIGINT(255) DEFAULT NULL COMMENT '下单人数',
PRIMARY KEY (`dt`,`recent_days`)
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_order_by_province;
CREATE TABLE `ads_order_by_province` (
`dt` DATE NOT NULL,
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`province_id` VARCHAR(255) NOT NULL COMMENT '统计日期',
`province_name` VARCHAR(255) DEFAULT NULL COMMENT '省份名称',
`area_code` VARCHAR(255) DEFAULT NULL COMMENT '地区编码',
`iso_code` VARCHAR(255) DEFAULT NULL COMMENT '国际标准地区编码',
`iso_code_3166_2` VARCHAR(255) DEFAULT NULL COMMENT '国际标准地区编码',
`order_count` BIGINT(20) DEFAULT NULL COMMENT '订单数',
`order_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '订单金额',
PRIMARY KEY (`dt`, `recent_days` ,`province_id`) USING BTREE
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_repeat_purchase;
CREATE TABLE `ads_repeat_purchase` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`tm_id` VARCHAR(255) NOT NULL COMMENT '品牌ID',
`tm_name` VARCHAR(255) DEFAULT NULL COMMENT '品牌名称',
`order_repeat_rate` DECIMAL(16,2) DEFAULT NULL COMMENT '复购率',
PRIMARY KEY (`dt` ,`recent_days`,`tm_id`)
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_order_spu_stats;
CREATE TABLE `ads_order_spu_stats` (
`dt` DATE NOT NULL COMMENT '统计日期',
`recent_days` BIGINT(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',
`spu_id` VARCHAR(255) NOT NULL COMMENT '商品ID',
`spu_name` VARCHAR(255) DEFAULT NULL COMMENT '商品名称',
`tm_id` VARCHAR(255) NOT NULL COMMENT '品牌ID',
`tm_name` VARCHAR(255) DEFAULT NULL COMMENT '品牌名称',
`category3_id` VARCHAR(255) NOT NULL COMMENT '三级品类ID',
`category3_name` VARCHAR(255) DEFAULT NULL COMMENT '三级品类名称',
`category2_id` VARCHAR(255) NOT NULL COMMENT '二级品类ID',
`category2_name` VARCHAR(255) DEFAULT NULL COMMENT '二级品类名称',
`category1_id` VARCHAR(255) NOT NULL COMMENT '一级品类ID',
`category1_name` VARCHAR(255) NOT NULL COMMENT '一级品类名称',
`order_count` BIGINT(20) DEFAULT NULL COMMENT '订单数',
`order_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '订单金额',
PRIMARY KEY (`dt`,`recent_days`,`spu_id`)
) ENGINE=INNODB DEFAULT CHARSET=utf8;
DROP TABLE IF EXISTS ads_activity_stats;
CREATE TABLE `ads_activity_stats` (
`dt` DATE NOT NULL COMMENT '统计日期',
`activity_id` VARCHAR(255) NOT NULL COMMENT '活动ID',
`activity_name` VARCHAR(255) DEFAULT NULL COMMENT '活动名称',
`start_date` DATE DEFAULT NULL COMMENT '开始日期',
`order_count` BIGINT(11) DEFAULT NULL COMMENT '参与活动订单数',
`order_original_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '参与活动订单原始金额',
`order_final_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '参与活动订单最终金额',
`reduce_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '优惠金额',
`reduce_rate` DECIMAL(16,2) DEFAULT NULL COMMENT '补贴率',
PRIMARY KEY (`dt`,`activity_id` )
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
DROP TABLE IF EXISTS ads_coupon_stats;
CREATE TABLE `ads_coupon_stats` (
`dt` DATE NOT NULL COMMENT '统计日期',
`coupon_id` VARCHAR(255) NOT NULL COMMENT '优惠券ID',
`coupon_name` VARCHAR(255) DEFAULT NULL COMMENT '优惠券名称',
`start_date` DATE DEFAULT NULL COMMENT '开始日期',
`rule_name` VARCHAR(200) DEFAULT NULL COMMENT '优惠规则',
`get_count` BIGINT(20) DEFAULT NULL COMMENT '领取次数',
`order_count` BIGINT(20) DEFAULT NULL COMMENT '使用(下单)次数',
`expire_count` BIGINT(20) DEFAULT NULL COMMENT '过期次数',
`order_original_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '使用优惠券订单原始金额',
`order_final_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '使用优惠券订单最终金额',
`reduce_amount` DECIMAL(16,2) DEFAULT NULL COMMENT '优惠金额',
`reduce_rate` DECIMAL(16,2) DEFAULT NULL COMMENT '补贴率',
PRIMARY KEY (`dt`,`coupon_id` )
) ENGINE=INNODB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
2 Sqoop导出脚本
编写Sqoop导出脚本
在/home/zhang/bin目录下创建脚本hdfs_to_mysql.sh
[[email protected] bin]$ vim hdfs_to_mysql.sh
在脚本中填写如下内容
#!/bin/bash
hive_db_name=gmall
mysql_db_name=gmall_report
export_data() {
/opt/module/sqoop/bin/sqoop export \
--connect "jdbc:mysql://hadoop102:3306/${mysql_db_name}?useUnicode=true&characterEncoding=utf-8" \
--username root \
--password 000000 \
--table $1 \
--num-mappers 1 \
--export-dir /warehouse/$hive_db_name/ads/$1 \
--input-fields-terminated-by "\t" \
--update-mode allowinsert \
--update-key $2 \
--input-null-string '\\N' \
--input-null-non-string '\\N'
}
case $1 in
"ads_activity_stats" )
export_data "ads_activity_stats" "dt,activity_id"
;;
"ads_coupon_stats" )
export_data "ads_coupon_stats" "dt,coupon_id"
;;
"ads_order_by_province" )
export_data "ads_order_by_province" "dt,recent_days,province_id"
;;
"ads_order_spu_stats" )
export_data "ads_order_spu_stats" "dt,recent_days,spu_id"
;;
"ads_order_total" )
export_data "ads_order_total" "dt,recent_days"
;;
"ads_page_path" )
export_data "ads_page_path" "dt,recent_days,source,target"
;;
"ads_repeat_purchase" )
export_data "ads_repeat_purchase" "dt,recent_days,tm_id"
;;
"ads_user_action" )
export_data "ads_user_action" "dt,recent_days"
;;
"ads_user_change" )
export_data "ads_user_change" "dt"
;;
"ads_user_retention" )
export_data "ads_user_retention" "create_date,retention_day"
;;
"ads_user_total" )
export_data "ads_user_total" "dt,recent_days"
;;
"ads_visit_stats" )
export_data "ads_visit_stats" "dt,recent_days,is_new,channel"
;;
"all" )
export_data "ads_activity_stats" "dt,activity_id"
export_data "ads_coupon_stats" "dt,coupon_id"
export_data "ads_order_by_province" "dt,recent_days,province_id"
export_data "ads_order_spu_stats" "dt,recent_days,spu_id"
export_data "ads_order_total" "dt,recent_days"
export_data "ads_page_path" "dt,recent_days,source,target"
export_data "ads_repeat_purchase" "dt,recent_days,tm_id"
export_data "ads_user_action" "dt,recent_days"
export_data "ads_user_change" "dt"
export_data "ads_user_retention" "create_date,retention_day"
export_data "ads_user_total" "dt,recent_days"
export_data "ads_visit_stats" "dt,recent_days,is_new,channel"
;;
esac
添加权限
[[email protected] bin]$ chmod +x hdfs_to_mysql.sh
执行
[[email protected] bin]$ ./hdfs_to_mysql.sh all
去MySQL中查询结果
3 全调度流程
3.1 数据准备
1)用户行为数据准备
Start the script first
(1)修改/opt/module/applog下的application.properties
[[email protected] bin]$ cd /opt/module/applog/
[[email protected] applog]$ ll
[[email protected] applog]$ vim application.yml
hadoop103做同样操作
(2)生成数据
[[email protected] bin]$ lg.sh
webThe end also has success data
(3)观察HDFS的/origin_data/gmall/log/topic_log/2020-06-15路径是否有数据
2)业务数据准备
(1)修改/opt/module/db_log下的application.properties
[[email protected] applog]$ cd /opt/module/db_log/
[[email protected] db_log]$ ll
[[email protected] db_log]$ vim application.properties
(2)生成数据
[[email protected] db_log]$ java -jar gmall2020-mock-db-2020-04-01.jar
3.2 编写Azkaban工作流程配置文件
1)编写azkaban.project文件,内容如下
azkaban-flow-version: 2.0
2)编写gmall.flow文件,内容如下
nodes:
- name: mysql_to_hdfs
type: command
config:
command: /home/zhang/bin/mysql_to_hdfs.sh all ${dt}
- name: hdfs_to_ods_log
type: command
config:
command: /home/zhang/bin/hdfs_to_ods_log.sh ${dt}
- name: hdfs_to_ods_db
type: command
dependsOn:
- mysql_to_hdfs
config:
command: /home/zhang/bin/hdfs_to_ods_db.sh all ${dt}
- name: ods_to_dim_db
type: command
dependsOn:
- hdfs_to_ods_db
config:
command: /home/zhang/bin/ods_to_dim_db.sh all ${dt}
- name: ods_to_dwd_log
type: command
dependsOn:
- hdfs_to_ods_log
config:
command: /home/zhang/bin/ods_to_dwd_log.sh all ${dt}
- name: ods_to_dwd_db
type: command
dependsOn:
- hdfs_to_ods_db
config:
command: /home/zhang/bin/ods_to_dwd_db.sh all ${dt}
- name: dwd_to_dws
type: command
dependsOn:
- ods_to_dim_db
- ods_to_dwd_log
- ods_to_dwd_db
config:
command: /home/zhang/bin/dwd_to_dws.sh all ${dt}
- name: dws_to_dwt
type: command
dependsOn:
- dwd_to_dws
config:
command: /home/zhang/bin/dws_to_dwt.sh all ${dt}
- name: dwt_to_ads
type: command
dependsOn:
- dws_to_dwt
config:
command: /home/zhang/bin/dwt_to_ads.sh all ${dt}
- name: hdfs_to_mysql
type: command
dependsOn:
- dwt_to_ads
config:
command: /home/zhang/bin/hdfs_to_mysql.sh all
3)将azkaban.project、gmall.flow文件压缩到一个zip文件,文件名称必须是英文.
4)在WebServer新建项目gmall,and add a name and description:http://hadoop102:8081/index
选择上传的文件——查看任务流——详细任务流展示(如下)
配置输入dt时间参数
先找到hadoop102对应的id(id不固定,需要进到executors表中查看)
点击执行
执行成功
在SQLyog上查看结果,有 2020-06-15的数据
边栏推荐
- Tower of Hanoi problem
- 数学解决——环形链表问题
- MPPT solar charge controller data collection - through the gateway acquisition capacity battery SOC battery voltage, wi-fi
- First acquaintance with C language -- array
- The difference between link and @import
- Clustering index, and what is the difference between a clustering index
- 12 磁盘相关命令
- 【银行系列第一期】中国人民银行
- Go 项目实战-获取多级分类下的全部商品
- 经典链表OJ强训题——快慢双指针高效解法
猜你喜欢
Face detection based on opencv
Static route analysis (the longest mask matching principle + active and standby routes)
Word/Excel fixed table size, when filling in the content, the table does not change with the cell content
经典链表OJ强训题——快慢双指针高效解法
Inter-vlan routing + static routing + NAT (PAT + static NAT) comprehensive experiment
Drools WorkBench的简介与使用
The Sad History of Image Processing Technology
力扣刷题之爬楼梯(7/30)
MPPT solar charge controller data collection - through the gateway acquisition capacity battery SOC battery voltage, wi-fi
全流程调度——MySQL与Sqoop
随机推荐
Manchester City confuses fans with smart scarf that detects emotions
221. Largest Square
Clustering index, and what is the difference between a clustering index
全流程调度——MySQL与Sqoop
mycat的主从关系 垂直分库 水平分表 以及mycat分片联表查询的配置详解(mysql5.7系列)
Calculate S=a+aa+…+aa…a
12 pictures take you to fully understand service current limit, circuit breaker, downgrade, and avalanche
开题报告之论文框架
LeetCode 1161 The largest element in the layer and the LeetCode road of [BFS binary tree] HERODING
golang GUI for nuxui — HelloWorld
跨专业考研难度大?“上岸”成功率低?这份实用攻略请收下!
多线程下类对象的服务承诺探讨
【shell基础】判断目录是否为空
First acquaintance with C language -- array
基于opencv实现人脸检测
AtCoder Beginner Contest 261 Partial Solution
Unity界面总体介绍
静态路由解析(最长掩码匹配原则+主备路由)
力扣刷题之有效的正方形(每日一题7/29)
图解lower_bound&upper_bound