当前位置:网站首页>sparksql 与flinksql 建表 与 连表记录
sparksql 与flinksql 建表 与 连表记录
2022-07-28 06:30:00 【路新航】
启动flink sql:bin/sql-client.sh
建表
flink建立表
create table iceberg.xxx.xxx
(
id STRING comment 'id',
dt STRING comment '分区字段'
)
PARTITIONED BY (dt)
with (
'write.format.default' = 'parquet', --指定文件存储格式,默认parquet
'write.parquet.compression-codec' = 'gzip', --指定文件压缩格式,
'commit.manifest-merge.enabled' = 'true', --写入时自动合并manifest
'history.expire.max-snapshot-age-ms' = '43200000', --历史快照保留时间(ms),默认5天,此处12h
'engine.hive.enabled' = 'true', --支持hive查询
'write.metadata.delete-after-commit.enabled' = 'true', --在每个新元数据文件创建后删除最旧的元数据文件
'write.metadata.previous-versions-max' = '20', --提交后删除之前版本元数据文件的最大数量
'write.metadata.compression-codec' = 'gzip', --开启元数据压缩为gzip格式
'location' = 'hdfs://ns1/lakehouse/schema_name/table_name' --指定hdfs地址
);
spark建立表
create table iceberg.xxx.xxxx
(
id STRING comment 'id',
dt STRING comment '分区字段'
)
using iceberg
partitioned by (dt)
location 'hdfs://xxx/lakehouse/schema_name/table_name'
tblproperties (
'write.format.default' = 'parquet', --指定文件存储格式,默认parquet
'write.parquet.compression-codec' = 'gzip', --指定文件压缩格式,
'commit.manifest-merge.enabled' = 'true', --写入时自动合并manifest
'history.expire.max-snapshot-age-ms' = '43200000', --历史快照保留时间(ms),默认5天,此处12h
'engine.hive.enabled' = 'true', --支持hive查询
'write.metadata.delete-after-commit.enabled' = 'true', --在每个新元数据文件创建后删除最旧的元数据文件
'write.metadata.previous-versions-max' = '20', --提交后删除之前版本元数据文件的最大数量
'write.metadata.compression-codec' = 'gzip' --开启元数据压缩为gzip格式
);
根据源表创建目标表
CREATE TABLE iceberg.schema_name.table_name
using iceberg
partitioned by (dt)
location 'hdfs://ns1/lakehouse/schema_name/table_name'
tblproperties (
'write.format.default' = 'parquet',
'write.parquet.compression-codec' = 'gzip',
'commit.manifest-merge.enabled' = 'true',
'engine.hive.enabled' = 'true',
'write.metadata.delete-after-commit.enabled' = 'true',
'write.metadata.previous-versions-max' = '20',
'write.metadata.compression-codec' = 'gzip'
) AS SELECT * from iceberg.schema_name.original_table_name where 1=2;
连接mysql、 sql server
spark
-- mysql
CREATE
TEMPORARY
VIEW tb_order_group
USING org.apache.spark.sql.jdbc
OPTIONS (
url 'jdbc:mysql://xx.xxx.xx.xx:4909/db_name?serverTimezone=GMT%2B8&useUnicode=true&characterEncoding=UTF-8&autoReconnect=true&zeroDateTimeBehavior=convertToNull',
dbtable 'xxx',
user 'xxx',
password 'xxx'
);
-- sqlserver
CREATE
TEMPORARY
VIEW gxywhz
USING org.apache.spark.sql.jdbc
OPTIONS (
url 'jdbc:sqlserver://192.168.1.xx:xxx;DatabaseName=xxx',
dbtable 'dbo.xxx',
user 'xxx',
password 'xxxx'
);
flink
-- Connector连接 创建映射表
CREATE TABLE mysql_source
(
id int comment 'id'
primary key (id) NOT ENFORCED
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:mysql://192.168.xx.xx:3306/xxx',
'table-name' = 'tableName',
'driver' = 'com.mysql.jdbc.Driver',
'username' = 'root',
'password' = 'xxx'
);
CREATE TABLE sqlserver_source
(
id STRING comment 'id'
) WITH (
'connector' = 'jdbc',
'url' = 'jdbc:jtds:sqlserver://192.168.xx.xxx:10009;databaseName=xxx;useLOBs=false',
'table-name' = 'schema.tableName',
'driver' = 'net.sourceforge.jtds.jdbc.Driver',
'username' = 'xx',
'password' = 'xxxxx'
);
边栏推荐
- JS thoroughly understand this point
- jquey的基础语法
- What if the computer folder cannot be renamed?
- Draw.io image saving path settings
- Opencv's practical learning of credit card recognition (4)
- How do we run batch mode in MySQL?
- The core packages and middleware required for golang development cover all areas of the project and are worth collecting
- 豪华版h5俄罗斯方块小游戏源码
- Qt多线程中槽函数在哪个线程里执行分析
- 华为高级工程师---BGP路由过滤及社团属性
猜你喜欢

Common solutions for distributed ID - take one

No super high-rise buildings | new regulations: what information does it reveal that no new buildings above 500 meters should be built?

Plantuml Usage Summary

Change the dataDir path after mysql8.0.16 installation

JS thoroughly understand this point

Characteristics of EMC EMI beads

【17】 Establish data path (upper): instruction + operation =cpu

Protobuf basic grammar summary

Basic dictionary of deep learning --- activation function, batch size, normalization

二维数组及操作
随机推荐
[leetcode] 24. Exchange nodes in the linked list in pairs
Some experience of gd32 using Hal Library of ST and Gd official library
Use ffmpeg to generate single image + single audio streaming video in batches
How do we run batch mode in MySQL?
使用FFmpeg来批量生成单图+单音频的一图流视频
What if the task manager is not fully displayed?
【17】建立数据通路(上):指令+运算=CPU
Prescan quick start to master the track editing path of Lecture 16
SQL function
【17】 Establish data path (upper): instruction + operation =cpu
03 | project deployment: how to quickly deploy a website developed based on the laravel framework
The fourth phase (2021-2022) research on the implementation of cloud native technology in traditional industries - central state-owned enterprises was officially released
Five screens, VR, projection, "Wei Xiaoli" rolled up on the intelligent cockpit
“蔚来杯“2022牛客暑期多校训练营2补题记录(DGHJKL)
C#,入门教程——程序运行时的调试技巧与逻辑错误探针技术与源代码
【花书笔记】 之 Chapter01 引言
Deep browser rendering principles
【13】加法器:如何像搭乐高一样搭电路(上)?
Solve the inherent defects of CNN! Common CNN architecture ccnn is coming | icml2022
Qt多线程中槽函数在哪个线程里执行分析