当前位置:网站首页>Master the use of auto analyze in data warehouse
Master the use of auto analyze in data warehouse
2022-07-04 19:32:00 【InfoQ】
1. Automatically collect scenes

2. Automatic collection principle
pg_stat_get_tuples_inserted -- Table accumulation insert Number of pieces
pg_stat_get_tuples_updated -- Table accumulation update Number of pieces
pg_stat_get_tuples_deleted -- Table accumulation delete Number of pieces
pg_stat_get_tuples_changed -- Table since last analyze since , Number of changes
pg_stat_get_last_analyze_time -- Query the last analyze Time
3. Automatically collect thresholds
3.1 Global threshold
autovacuum_analyze_threshold # The table triggers analyze Minimum modification of
autovacuum_analyze_scale_factor # The table triggers analyze Percentage of changes when
3.2 Table level threshold
-- Set table level threshold
ALTER TABLE item SET (autovacuum_analyze_threshold=50);
ALTER TABLE item SET (autovacuum_analyze_scale_factor=0.1);
-- Query threshold
postgres=# select pg_options_to_table(reloptions) from pg_class where relname='item';
pg_options_to_table
---------------------------------------
(autovacuum_analyze_threshold,50)
(autovacuum_analyze_scale_factor,0.1)
(2 rows)
-- Reset threshold
ALTER TABLE item RESET (autovacuum_analyze_threshold);
ALTER TABLE item RESET (autovacuum_analyze_scale_factor);
3.3 Check whether the modification amount of the table exceeds the threshold ( Only the current CN)
postgres=# select pg_stat_get_local_analyze_status('t_analyze'::regclass);
pg_stat_get_local_analyze_status
----------------------------------
Analyze not needed
(1 row)
4. Automatic collection method
- When there is “ Statistics are completely missing ” or “ The modification amount reaches analyze threshold ” Table of , And the implementation plan does not take FQS (Fast Query Shipping) Execution time , Through autoanalyze Control the automatic collection of statistical information in the following table in this scenario . here , The query statement will wait for the statistics to be collected successfully , Generate a better execution plan , Then execute the original query statement .
- When autovacuum Set to on when , The system will start regularly autovacuum Threads , Yes “ The modification amount reaches analyze threshold ” The table automatically collects statistical information in the background .

5. Freeze Statistics
5.1 Freeze table distinct value
postgres=# alter table lineitem alter l_orderkey set (n_distinct=0.9);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------------
lineitem | l_orderkey | {n_distinct=0.9}
(1 row)
postgres=# alter table lineitem alter l_orderkey reset (n_distinct);
ALTER TABLE
postgres=# select relname,attname,attoptions from pg_attribute a,pg_class c where c.oid=a.attrelid and attname='l_orderkey';
relname | attname | attoptions
----------+------------+------------
lineitem | l_orderkey |
(1 row)
5.2. Freeze all statistics of the table
alter table table_name set frozen_stats=true;
6. Manually check whether the table needs to be done analyze
6.1 Determine whether the table needs analyze( Serial version , Applicable to all historical versions )
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
row_data record;
coor_name record;
fet_active text;
fetch_coor text;
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
fetch_coor := 'SELECT node_name FROM pgxc_node WHERE node_type=''C''';
FOR coor_name IN EXECUTE(fetch_coor) LOOP
fet_active := 'EXECUTE DIRECT ON (' || coor_name.node_name || ') ''SELECT pg_stat_get_tuples_changed(oid) from pg_class where relname = ''''|| table_name ||'''';''';
FOR row_data IN EXECUTE(fet_active) LOOP
changedTuples = changedTuples + row_data.pg_stat_get_tuples_changed;
END LOOP;
END LOOP;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
--dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';
6.2 Determine whether the table needs analyze( Parallel Edition , For versions that support parallel execution frameworks )
-- the function for get all pg_stat_activity information in all CN of current cluster.
--SELECT sum(a) FROM pg_catalog.pgxc_parallel_query('cn', 'SELECT 1::int FROM pg_class LIMIT 10') AS (a int); Using concurrent execution framework
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
rel_anl_threshold int4;
rel_anl_scale_factor float4;
sys_anl_threshold int4;
sys_anl_scale_factor float4;
anl_threshold int4;
anl_scale_factor float4;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
EXECUTE 'show autovacuum_analyze_threshold;' into sys_anl_threshold;
EXECUTE 'show autovacuum_analyze_scale_factor;' into sys_anl_scale_factor;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_threshold'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_threshold;
EXECUTE 'select (select option_value from pg_options_to_table(c.reloptions) where option_name = ''autovacuum_analyze_scale_factor'') as value
from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into rel_anl_scale_factor;
dbms_output.put_line('relTuples='||relTuples||'; sys_anl_threshold='||sys_anl_threshold||'; sys_anl_scale_factor='||sys_anl_scale_factor||'; rel_anl_threshold='||rel_anl_threshold||'; rel_anl_scale_factor='||rel_anl_scale_factor||';');
if rel_anl_threshold IS NOT NULL then
anl_threshold = rel_anl_threshold;
else
anl_threshold = sys_anl_threshold;
end if;
if rel_anl_scale_factor IS NOT NULL then
anl_scale_factor = rel_anl_scale_factor;
else
anl_scale_factor = sys_anl_scale_factor;
end if;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';
6.3 Determine whether the table needs analyze( Custom threshold )
-- the function for get all pg_stat_activity information in all CN of current cluster.
CREATE OR REPLACE FUNCTION pg_catalog.pgxc_stat_table_need_analyze(in table_name text, int anl_threshold, float anl_scale_factor)
RETURNS BOOl
AS $$
DECLARE
relTuples int4;
changedTuples int4:= 0;
need_analyze bool := false;
BEGIN
--Get all the node names
EXECUTE 'SELECT sum(a) FROM pg_catalog.pgxc_parallel_query(''cn'', ''SELECT pg_stat_get_tuples_changed(oid)::int4 from pg_class where relname = ''''|| table_name ||'''';'') AS (a int4);' into changedTuples;
EXECUTE 'select pg_stat_get_live_tuples(oid) from pg_class c where c.oid = '''|| table_name ||'''::REGCLASS;' into relTuples;
if changedTuples > anl_threshold + anl_scale_factor * relTuples then
need_analyze := true;
end if;
return need_analyze;
END; $$
LANGUAGE 'plpgsql';
边栏推荐
猜你喜欢
随机推荐
26. 删除有序数组中的重复项 C#解答
Using FTP
如何使用Async-Awati异步任务处理代替BackgroundWorker?
FTP, SFTP file transfer
Is the securities account opened by qiniu safe?
php伪原创api对接方法
Don't just learn Oracle and MySQL!
Build your own website (15)
. Net ORM framework hisql practice - Chapter 2 - using hisql to realize menu management (add, delete, modify and check)
Cache é JSON uses JSON adapters
国元期货是正规平台吗?在国元期货开户安全吗?
Shell 编程核心技术《二》
物联网应用技术的就业前景和现状
How to use async Awati asynchronous task processing instead of backgroundworker?
Process of manually encrypt the mass-producing firmware and programming ESP devices
牛客小白月赛7 F题
Opencv functions and methods related to binary threshold processing are summarized for comparison and use
删除字符串中出现次数最少的字符【JS,Map排序,正则】
【问题】druid报异常sql injection violation, part alway true condition not allow 解决方案
生成XML元素