当前位置:网站首页>一文掌握数仓中auto analyze的使用
一文掌握数仓中auto analyze的使用
2022-07-04 17:48:00 【InfoQ】
1. 自动收集场景

2. 自动收集原理
pg_stat_get_tuples_inserted --表累积insert条数
pg_stat_get_tuples_updated --表累积update条数
pg_stat_get_tuples_deleted --表累积delete条数
pg_stat_get_tuples_changed --表自上次analyze以来,修改的条数
pg_stat_get_last_analyze_time --查询最近一次analyze时间3. 自动收集阈值
3.1 全局阈值
autovacuum_analyze_threshold #表触发analyze的最小修改量
autovacuum_analyze_scale_factor #表触发analyze时的修改百分比3.2 表级阈值
--设置表级阈值
ALTER TABLE item SET (autovacuum_analyze_threshold=50);
ALTER TABLE item SET (autovacuum_analyze_scale_factor=0.1);
--查询阈值
postgres=# select pg_options_to_table(reloptions) from pg_class where relname='item';
pg_options_to_table
---------------------------------------
(autovacuum_analyze_threshold,50)
(autovacuum_analyze_scale_factor,0.1)
(2 rows)
--重置阈值
ALTER TABLE item RESET (autovacuum_analyze_threshold);
ALTER TABLE item RESET (autovacuum_analyze_scale_factor);3.3 查看表的修改量是否超过了阈值(仅当前CN)
postgres=# select pg_stat_get_local_analyze_status('t_analyze'::regclass);
pg_stat_get_local_analyze_status
----------------------------------
Analyze not needed
(1 row)4. 自动收集方式
- 当查询中存在“统计信息完全缺失”或“修改量达到analyze阈值”的表,且执行计划不采取FQS (Fast Query Shipping)执行时,则通过autoanalyze控制此场景下表统计信息的自动收集。此时,查询语句会等待统计信息收集成功后,生成更优的执行计划,再执行原查询语句。
- 当autovacuum设置为on时,系统会定时启动autovacuum线程,对“修改量达到analyze阈值”的表在后台自动进行统计信息收集。

5.冻结统计信息
5.1 冻结表的distinct值
postgres=# alter table lineitem alter l_orderkey set (n_distinct=0.9);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------------
lineitem | l_orderkey | {n_distinct=0.9}
(1 row)
postgres=# alter table lineitem alter l_orderkey reset (n_distinct);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------
lineitem | l_orderkey |
(1 row)5.2. 冻结表的全部统计信息
alter table table_name set frozen_stats=true;6. 手动查看表是否需要做analyze
6.1 判断表是否需要analyze(串行版,适用于所有历史版本)
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
row_data record;
coor_name record;
fet_active text;
fetch_coor text;
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
fetch_coor := 'SELECT node_name FROM pgxc_node WHERE node_type=''C''';
FOR coor_name IN EXECUTE(fetch_coor) LOOP
fet_active := 'EXECUTE DIRECT ON (' || coor_name.node_name || ') ''SELECT pg_stat_get_tuples_changed(oid) from pg_class where relname = ''''|| table_name ||'''';''';
FOR row_data IN EXECUTE(fet_active) LOOP
changedTuples = changedTuples + row_data.pg_stat_get_tuples_changed;
END LOOP;
END LOOP;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
--dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';6.2 判断表是否需要analyze(并行版,适用于支持并行执行框架的版本)
-- the function for get all pg_stat_activity information in all CN of current cluster.
--SELECT sum(a) FROM pg_catalog.pgxc_parallel_query('cn', 'SELECT 1::int FROM pg_class LIMIT 10') AS (a int); 利用并发执行框架
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';6.3 判断表是否需要analyze(自定义阈值)
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text, int anl_threshold, float anl_scale_factor)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';边栏推荐
- Basic tutorial of scala -- 16 -- generics
- 基于lex和yacc的词法分析器+语法分析器
- C # implementation defines a set of SQL statements that can be executed across databases in the middle of SQL (detailed explanation of the case)
- Build your own website (15)
- DeFi生态NFT流动性挖矿系统开发搭建
- [发布] 一个测试 WebService 和数据库连接的工具 - DBTest v1.0
- 更安全、更智能、更精致,长安Lumin完虐宏光MINI EV?
- Perfect JS event delegation
- 1672. Total assets of the richest customers
- 完善的js事件委托
猜你喜欢

Process of manually encrypt the mass-producing firmware and programming ESP devices

Scala basic tutorial -- 20 -- akka

升级智能开关,“零火版”、“单火”接线方式差异有多大?

神经网络物联网应用技术就业前景【欢迎补充】

The latest progress of Intel Integrated Optoelectronics Research promotes the progress of CO packaging optics and optical interconnection technology

奥迪AUDI EDI INVOIC发票报文详解

更安全、更智能、更精致,长安Lumin完虐宏光MINI EV?

Lex and yacc based lexical analyzer + parser

Use canal and rocketmq to listen to MySQL binlog logs

基于unity的愤怒的小鸟设计
随机推荐
Bi skills - permission axis
问下各位大佬有用过cdc直接mysql to clickhouse的么
请教一下 flinksql中 除了数据统计结果是状态被保存 数据本身也是状态吗
Process of manually encrypt the mass-producing firmware and programming ESP devices
Don't just learn Oracle and MySQL!
ThreadLocal原理与使用
Nebula importer data import practice
Technology sharing | interface testing value and system
2014 Hefei 31st youth informatics Olympic Games (primary school group) test questions
基于NCF的多模块协同实例
LeetCode 赎金信 C#解答
C language printing exercise
IBM WebSphere MQ retrieving messages
模板_大整数减法_无论大小关系
Technologie de base de la programmation Shell IV
Send and receive IBM WebSphere MQ messages
资料下载 丨首届腾讯技术开放日课程精华!
[release] a tool for testing WebService and database connection - dbtest v1.0
与二值化阈值处理相关的OpenCV函数、方法汇总,便于对比和拿来使用
神经网络物联网是什么意思通俗的解释