当前位置:网站首页>Master the use of auto analyze in data warehouse
Master the use of auto analyze in data warehouse
2022-07-04 19:32:00 【InfoQ】
1. Automatically collect scenes

2. Automatic collection principle
pg_stat_get_tuples_inserted -- Table accumulation insert Number of pieces
pg_stat_get_tuples_updated -- Table accumulation update Number of pieces
pg_stat_get_tuples_deleted -- Table accumulation delete Number of pieces
pg_stat_get_tuples_changed -- Table since last analyze since , Number of changes
pg_stat_get_last_analyze_time -- Query the last analyze Time 3. Automatically collect thresholds
3.1 Global threshold
autovacuum_analyze_threshold # The table triggers analyze Minimum modification of
autovacuum_analyze_scale_factor # The table triggers analyze Percentage of changes when 3.2 Table level threshold
-- Set table level threshold
ALTER TABLE item SET (autovacuum_analyze_threshold=50);
ALTER TABLE item SET (autovacuum_analyze_scale_factor=0.1);
-- Query threshold
postgres=# select pg_options_to_table(reloptions) from pg_class where relname='item';
pg_options_to_table
---------------------------------------
(autovacuum_analyze_threshold,50)
(autovacuum_analyze_scale_factor,0.1)
(2 rows)
-- Reset threshold
ALTER TABLE item RESET (autovacuum_analyze_threshold);
ALTER TABLE item RESET (autovacuum_analyze_scale_factor);3.3 Check whether the modification amount of the table exceeds the threshold ( Only the current CN)
postgres=# select pg_stat_get_local_analyze_status('t_analyze'::regclass);
pg_stat_get_local_analyze_status
----------------------------------
Analyze not needed
(1 row)4. Automatic collection method
- When there is “ Statistics are completely missing ” or “ The modification amount reaches analyze threshold ” Table of , And the implementation plan does not take FQS (Fast Query Shipping) Execution time , Through autoanalyze Control the automatic collection of statistical information in the following table in this scenario . here , The query statement will wait for the statistics to be collected successfully , Generate a better execution plan , Then execute the original query statement .
- When autovacuum Set to on when , The system will start regularly autovacuum Threads , Yes “ The modification amount reaches analyze threshold ” The table automatically collects statistical information in the background .

5. Freeze Statistics
5.1 Freeze table distinct value
postgres=# alter table lineitem alter l_orderkey set (n_distinct=0.9);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------------
lineitem | l_orderkey | {n_distinct=0.9}
(1 row)
postgres=# alter table lineitem alter l_orderkey reset (n_distinct);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------
lineitem | l_orderkey |
(1 row)5.2. Freeze all statistics of the table
alter table table_name set frozen_stats=true;6. Manually check whether the table needs to be done analyze
6.1 Determine whether the table needs analyze( Serial version , Applicable to all historical versions )
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
row_data record;
coor_name record;
fet_active text;
fetch_coor text;
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
fetch_coor := 'SELECT node_name FROM pgxc_node WHERE node_type=''C''';
FOR coor_name IN EXECUTE(fetch_coor) LOOP
fet_active := 'EXECUTE DIRECT ON (' || coor_name.node_name || ') ''SELECT pg_stat_get_tuples_changed(oid) from pg_class where relname = ''''|| table_name ||'''';''';
FOR row_data IN EXECUTE(fet_active) LOOP
changedTuples = changedTuples + row_data.pg_stat_get_tuples_changed;
END LOOP;
END LOOP;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
--dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';6.2 Determine whether the table needs analyze( Parallel Edition , For versions that support parallel execution frameworks )
-- the function for get all pg_stat_activity information in all CN of current cluster.
--SELECT sum(a) FROM pg_catalog.pgxc_parallel_query('cn', 'SELECT 1::int FROM pg_class LIMIT 10') AS (a int); Using concurrent execution framework
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';6.3 Determine whether the table needs analyze( Custom threshold )
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text, int anl_threshold, float anl_scale_factor)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';边栏推荐
- Oracle with as ora-00903: invalid table name multi report error
- MySQL数据库基本操作-DDL | 黑马程序员
- 876. 链表的中间结点
- FPGA时序约束分享01_四大步骤简述
- 性能优化之关键渲染路径
- Send and receive IBM WebSphere MQ messages
- 请教一下 flinksql中 除了数据统计结果是状态被保存 数据本身也是状态吗
- 在线文本行固定长度填充工具
- Safer, smarter and more refined, Chang'an Lumin Wanmei Hongguang Mini EV?
- HDU 1097 A hard puzzle
猜你喜欢

The latest progress of Intel Integrated Optoelectronics Research promotes the progress of CO packaging optics and optical interconnection technology

更安全、更智能、更精致,长安Lumin完虐宏光MINI EV?

正则替换【JS,正则表达式】

性能优化之关键渲染路径
Summary and sorting of 8 pits of redis distributed lock

整理混乱的头文件,我用include what you use

Pointnet/Pointnet++点云数据集处理并训练

The 300th weekly match of leetcode (20220703)

Upgrade the smart switch, how much is the difference between the "zero fire version" and "single fire" wiring methods?

Go微服务(二)——Protobuf详细入门
随机推荐
如何使用Async-Awati异步任务处理代替BackgroundWorker?
1008 Elevator(20 分)(PAT甲级)
prometheus安装
神经网络物联网是什么意思通俗的解释
Comment utiliser async awati asynchrone Task Handling au lieu de backgroundworker?
《工作、消费主义和新穷人》的微信读书笔记
正则替换【JS,正则表达式】
1672. 最富有客户的资产总量
英特尔集成光电研究最新进展推动共封装光学和光互连技术进步
Generate XML elements
Unity adds a function case similar to editor extension to its script, the use of ContextMenu
1007 Maximum Subsequence Sum(25 分)(PAT甲级)
与二值化阈值处理相关的OpenCV函数、方法汇总,便于对比和拿来使用
牛客小白月赛7 谁是神箭手
Qt实现界面滑动切换效果
Shell programming core technology II
Technologie de base de la programmation Shell IV
One question per day (2022-07-02) - Minimum refueling times
千万不要只学 Oracle、MySQL!
SSRS筛选器的IN运算(即包含于)用法