当前位置:网站首页>MySQL index optimization
MySQL index optimization
2022-07-28 06:44:00 【yfyh2021】
Create table statement , Insert at the same time 10 Ten thousand data
CREATE TABLE `employees` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(24) NOT NULL DEFAULT '' COMMENT ' full name ',
`age` int(11) NOT NULL DEFAULT '0' COMMENT ' Age ',
`position` varchar(20) NOT NULL DEFAULT '' COMMENT ' Position ',
`hire_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT ' Entry time ',
PRIMARY KEY (`id`),
KEY `idx_name_age_position` (`name`,`age`,`position`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT=' Employee record form ';
INSERT INTO employees(name,age,position,hire_time) VALUES('LiLei',22,'manager',NOW());
INSERT INTO employees(name,age,position,hire_time) VALUES('HanMeimei', 23,'dev',NOW());
INSERT INTO employees(name,age,position,hire_time) VALUES('Lucy',23,'dev',NOW());
-- Insert some sample data
drop procedure if exists insert_emp;
delimiter ;;
create procedure insert_emp()
begin
declare i int;
set i=1;
while(i<=100000)do
insert into employees(name,age,position) values(CONCAT('zhuge',i),i,'dev');
set i=i+1;
end while;
end;;
delimiter ;
call insert_emp();A comprehensive example
The range of the first field of the joint index will not go through the index
EXPLAIN SELECT * FROM employees WHERE name > 'LiLei' AND age = 22 AND position ='manager';
Conclusion : The first field of the joint index is searched by range, and the index will not be used ,mysql Internally, you may think that the first field is a range , The result set should be large , The meter return efficiency is not high , You might as well scan the whole table
Force Index
EXPLAIN SELECT * FROM employees force index(idx_name_age_position) WHERE name > 'LiLei' AND age = 22 AND position ='manager';
Conclusion : Although the forced index is used, the first field range lookup of the joint index also goes through the index , Scanning line rows It looks a little less , However, the final search efficiency is not necessarily higher than that of full table scanning , Because the meter return efficiency is not high
Let's check :
Forced indexing is inefficient Coverage index optimization
EXPLAIN SELECT name,age,position FROM employees WHERE name > 'LiLei' AND age = 22 AND position ='manager';
in and or When the amount of table data is large, the index will be used , When there are few table records, full table scanning will be selected
EXPLAIN SELECT * FROM employees WHERE name in ('LiLei','HanMeimei','Lucy') AND age = 22 AND position ='manager';
EXPLAIN SELECT * FROM employees WHERE (name = 'LiLei' or name = 'HanMeimei') AND age = 22 AND position ='manager';
mysql How to choose the right index
EXPLAIN select * from employees where name > 'a';
EXPLAIN select * from employees where name > 'zzz' ; 

The same table , Some of the same fields are indexed , Some don't go through the index , This involves mysql There's a... Inside cost Costing . We can go through trace Tools to find out :
trace Tool use
set session optimizer_trace="enabled=on",end_markers_in_json=on; -- Turn on trace
select * from employees where name > 'a' order by position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;
{
"steps": [
{
"join_preparation": {
"select#": 1, -- The first stage :sql Preparation stage ,sql Optimize
"steps": [
{
"expanded_query": "/* select#1 */ select `employees`.`id` AS `id`,`employees`.`name` AS `name`,`employees`.`age` AS `age`,`employees`.`position` AS `position`,`employees`.`hire_time` AS `hire_time` from `employees` where (`employees`.`name` > 'a') order by `employees`.`position`"
}
] /* steps */
} /* join_preparation */
},
{
"join_optimization": { -- The second stage :sql Optimization stage
"select#": 1,
"steps": [
{
"condition_processing": {
"condition": "WHERE",
"original_condition": "(`employees`.`name` > 'a')",
"steps": [
{
"transformation": "equality_propagation",
"resulting_condition": "(`employees`.`name` > 'a')"
},
{
"transformation": "constant_propagation",
"resulting_condition": "(`employees`.`name` > 'a')"
},
{
"transformation": "trivial_condition_removal",
"resulting_condition": "(`employees`.`name` > 'a')"
}
] /* steps */
} /* condition_processing */
},
{
"substitute_generated_columns": {
} /* substitute_generated_columns */
},
{
"table_dependencies": [ -- Table dependency details
{
"table": "`employees`",
"row_may_be_null": false,
"map_bit": 0,
"depends_on_map_bits": [
] /* depends_on_map_bits */
}
] /* table_dependencies */
},
{
"ref_optimizer_key_uses": [
] /* ref_optimizer_key_uses */
},
{
"rows_estimation": [ -- Estimate the access cost of the table
{
"table": "`employees`",
"range_analysis": {
"table_scan": { -- Full table scanning
"rows": 94170, -- Number of scanning lines
"cost": 9707.3 -- Query cost
} /* table_scan */,
"potential_range_indexes": [ -- Query possible indexes
{
"index": "PRIMARY", -- primary key
"usable": false,
"cause": "not_applicable"
},
{
"index": "idx_name_age_position", -- Secondary index
"usable": true,
"key_parts": [
"name",
"age",
"position",
"id"
] /* key_parts */
}
] /* potential_range_indexes */,
"setup_range_conditions": [
] /* setup_range_conditions */,
"group_index_range": {
"chosen": false,
"cause": "not_group_by_or_distinct"
} /* group_index_range */,
"skip_scan_range": {
"potential_skip_scan_indexes": [
{
"index": "idx_name_age_position",
"usable": false,
"cause": "query_references_nonkey_column"
}
] /* potential_skip_scan_indexes */
} /* skip_scan_range */,
"analyzing_range_alternatives": { -- Analyze the cost of using each index
"range_scan_alternatives": [
{
"index": "idx_name_age_position",
"ranges": [
"a < name" -- Index usage scope
] /* ranges */,
"index_dives_for_eq_ranges": true,
"rowid_ordered": false, -- Whether the records obtained by this index are sorted by primary key
"using_mrr": false,
"index_only": false, -- Whether to use overlay index
"rows": 47085, -- Number of index scan lines
"cost": 51666, -- Index usage cost
"chosen": false,
"cause": "cost"
}
] /* range_scan_alternatives */,
"analyzing_roworder_intersect": {
"usable": false,
"cause": "too_few_roworder_scans"
} /* analyzing_roworder_intersect */
} /* analyzing_range_alternatives */
} /* range_analysis */
}
] /* rows_estimation */
},
{
"considered_execution_plans": [
{
"plan_prefix": [
] /* plan_prefix */,
"table": "`employees`",
"best_access_path": { -- Optimal access path
"considered_access_paths": [ -- Final access path
{
"rows_to_scan": 94170,
"access_type": "scan", -- Access type :scan Full table scan
"resulting_rows": 94170,
"cost": 9705.2,
"chosen": true, -- Determine the choice
"use_tmp_table": true
}
] /* considered_access_paths */
} /* best_access_path */,
"condition_filtering_pct": 100,
"rows_for_plan": 94170,
"cost_for_plan": 9705.2,
"sort_cost": 94170,
"new_cost_for_plan": 103875,
"chosen": true
}
] /* considered_execution_plans */
},
{
"attaching_conditions_to_tables": {
"original_condition": "(`employees`.`name` > 'a')",
"attached_conditions_computation": [
] /* attached_conditions_computation */,
"attached_conditions_summary": [
{
"table": "`employees`",
"attached": "(`employees`.`name` > 'a')"
}
] /* attached_conditions_summary */
} /* attaching_conditions_to_tables */
},
{
"optimizing_distinct_group_by_order_by": {
"simplifying_order_by": {
"original_clause": "`employees`.`position`",
"items": [
{
"item": "`employees`.`position`"
}
] /* items */,
"resulting_clause_is_simple": true,
"resulting_clause": "`employees`.`position`"
} /* simplifying_order_by */
} /* optimizing_distinct_group_by_order_by */
},
{
"reconsidering_access_paths_for_index_ordering": {
"clause": "ORDER BY",
"steps": [
] /* steps */,
"index_order_summary": {
"table": "`employees`",
"index_provides_order": false,
"order_direction": "undefined",
"index": "unknown",
"plan_changed": false
} /* index_order_summary */
} /* reconsidering_access_paths_for_index_ordering */
},
{
"finalizing_table_conditions": [
{
"table": "`employees`",
"original_table_condition": "(`employees`.`name` > 'a')",
"final_table_condition ": "(`employees`.`name` > 'a')"
}
] /* finalizing_table_conditions */
},
{
"refine_plan": [
{
"table": "`employees`"
}
] /* refine_plan */
},
{
"considering_tmp_tables": [
{
"adding_sort_to_table_in_plan_at_position": 0
} /* filesort */
] /* considering_tmp_tables */
}
] /* steps */
} /* join_optimization */
},
{
"join_execution": {
"select#": 1,
"steps": [
{
"sorting_table_in_plan_at_position": 0,
"filesort_information": [
{
"direction": "asc",
"table": "`employees`",
"field": "position"
}
] /* filesort_information */,
"filesort_priority_queue_optimization": {
"usable": false,
"cause": "not applicable (no LIMIT)"
} /* filesort_priority_queue_optimization */,
"filesort_execution": [
] /* filesort_execution */,
"filesort_summary": {
"memory_available": 262144,
"key_size": 40,
"row_size": 188,
"max_rows_per_buffer": 1394,
"num_rows_estimate": 281600,
"num_rows_found": 93919,
"num_initial_chunks_spilled_to_disk": 28,
"peak_memory_used": 268928,
"sort_algorithm": "std::stable_sort",
"sort_mode": "<fixed_sort_key, packed_additional_fields>"
} /* filesort_summary */
}
] /* steps */
} /* join_execution */
}
] /* steps */
}common sql In depth optimization
1.Order by And Group by Optimize
case1:
EXPLAIN select * from employees where name='LiLei' and position='dev' order by age

analysis :
Using the leftmost prefix rule : The middle field cannot be broken , So the query uses name Indexes , from key_len=74 You can see ,age Index columns are used in sorting , because Extra There is no using filesort
case2:
EXPLAIN select * from employees where name='LiLei' order by position

analysis :
from explain The results of the implementation of :key_len=74, The query uses name Indexes , Because of the use position Sort , Skip the age, There is Using filesort.
Case 3:
EXPLAIN select * from employees where name='LiLei' order by age,position
analysis :
Find only the index name,age and position Used to sort , nothing Using filesort.
case4:
EXPLAIN select * from employees where name='LiLei' order by position,age

analysis :
and Case 3 in explain The results are the same , But there was Using filesort, Because the order of index creation is name,age,position, But when sorting age and position The position is reversed .
case5:
EXPLAIN select * from employees where name='LiLei' and age=18 order by position,age

analysis :
And Case 4 contrast , stay Extra There is no Using filesort, because age For constant , Optimized in sorting , So the index is not upside down , There will be no Using filesort.
case6:
EXPLAIN select * from employees where name='LiLei' order by age asc,position desc

analysis :
Although the sorted field columns are in the same order as the index , And order by Default ascending order , here position desc In descending order , Results in a different sort from the index , To produce Using filesort.Mysql8 The above version has a descending index to support this query method .
case7
EXPLAIN select * from employees where name >'LiLei' order by name

Overlay indexes can be used to optimize
EXPLAIN select name,age,position from employees where name >'LiLei' order by name
Optimization summary :
1、MySQL Support two ways of sorting filesort and index,Using index Refer to MySQL Scan index itself to complete sorting .index Efficient ,filesort Low efficiency .
2、order by Meet two situations will use Using index.
1) order by The statement uses the left most front of the index .
2) Use where Clause and order by Clause condition column combination satisfies the left most front of index .
3、 Try to sort on the index columns , Follow index building ( The order in which the indexes are created ) The leftmost prefix rule of .
4、 If order by The condition of is not on the index column , It will produce Using filesort.
5、 Can use overlay index, try to use overlay index
6、group by And order by Is very similar , Its essence is to sort first and then group , Follow the leftmost prefix rule of index creation order . about group by If you don't need sorting, you can add order by null No sorting . Be careful ,where higher than having, It can be written in where Don't go to... If you have the qualifications in having Limit the .
2. Paging query optimization
Many times, the paging function of our business system may be used as follows sql Realization
select * from employees limit 10000,10;
From the table employees To remove from 10001 The line 10 rows . It seems that only query 10 Bar record , Actually this one SQL Read first 10010 Bar record , And then before I leave 10000 Bar record , Then read back 10 Data you want . Therefore, you need to query the data behind a large table , Execution efficiency is very low .
case 1: Paged query sorted by auto increasing and continuous primary key
select * from employees limit 90000,5;
select * from employees where id > 90000 limit 5;
however , This rewritten SQL Not practical in many scenarios , Because some records in the table may be deleted , The primary key is empty , The results are inconsistent .
therefore , If the primary key is not continuous , The optimization method described above cannot be used . In addition, if the original SQL yes order by Non primary key fields , Rewriting according to the above method will lead to two SQL The results are inconsistent . So the rewriting must meet the following two conditions :
- The primary key is self increasing and continuous
- The result is sorted by primary key
case 2: Paging queries sorted by non primary key fields
select * from employees ORDER BY name limit 90000,5;
select * from employees e inner join (select id from employees order by name limit 90000,5) ed on e.id = ed.id;


3.join Association optimization query
-- Sample table :
CREATE TABLE `t1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`a` int(11) DEFAULT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `idx_a` (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
create table t2 like t1;
-- Insert some sample data
-- Go to t1 Table insert 1 Ten thousand records
drop procedure if exists insert_t1;
delimiter ;;
create procedure insert_t1()
begin
declare i int;
set i=1;
while(i<=10000)do
insert into t1(a,b) values(i,i);
set i=i+1;
end while;
end;;
delimiter ;
call insert_t1();
-- Go to t2 Table insert 100 rows
drop procedure if exists insert_t2;
delimiter ;;
create procedure insert_t2()
begin
declare i int;
set i=1;
while(i<=100)do
insert into t2(a,b) values(i,i);
set i=i+1;
end while;
end;;
delimiter ;
call insert_t2();mysql There are two algorithms for table Association
1、 Nested loop connection Nested-Loop Join(NLJ) Algorithm
EXPLAIN select * from t1 inner join t2 on t1.a= t2.a;
commonly join In the sentence , If you execute the plan Extra There is no Using join buffer Indicates the used join The algorithm is NLJ.
above sql The general process is as follows :
- From the table t2 Read a row of data in the ( If t2 Table with query filter criteria , Filter with the first condition , Then take out a row of data from the filtering result );
- From 1 Step data , Take out the associated fields a, To table t1 Search for ;
- Take out the watch t1 The line that satisfies the condition in , Follow t2 Merge the results obtained in , Return to the client as a result ;
- Repeat the above 3 Step .
The whole process will read t2 All data of table ( scanning 100 That's ok ), Then traverse the fields in each row of data a Value , according to t2 In the table a Value index scan t1 The corresponding row in the table ( scanning 100 Time t1 Index of tables ,1 One scan can be considered as only scanning in the end t1 One row of complete data in the table , That's all t1 The watch was also scanned 100 That's ok ). So the whole process scans 200 That's ok .
2. Block based nested loop connections Block Nested-Loop Join(BNL) Algorithm
EXPLAIN select * from t1 inner join t2 on t1.b= t2.b
Extra in Of Using join buffer (Block Nested Loop) This indicates that the association query uses BNL Algorithm .
above sql The general process is as follows :
- hold t2 Put all the data into join_buffer in
- Keep watch t1 Take out every line in the , Follow join_buffer Compare the data in
- Return to satisfaction join Conditional data
The whole process is on the table t1 and t2 We did a full scan , Therefore, the total number of rows scanned is 10000( surface t1 Total data of ) + 100( surface t2 Total data of ) = 10100. also join_buffer The data in the is out of order , So watch t1 Each line in , Do it all 100 Second judgment , So the number of judgments in memory is 100 * 10000= 100 Ten thousand times .
For Association sql The optimization of the
- The associated fields are , Give Way mysql do join Try to choose NLJ Algorithm , The driver table needs to be queried , Therefore, the filtering conditions should also follow the index as far as possible , Avoid full table scanning , All in all , The filter conditions that can go through the index should go through the index as far as possible
- Small tables drive large tables , Write multi table join sql If you know which table is small, you can use it straight_join Fixed connection drive mode , Omit mysql The optimizer's own time
3.in and exsits Optimize
principle : Small tables drive large tables , That is, small datasets drive large datasets
in: When B The data set of the table is less than A When the data set of the table ,in be better than exists
exists: When A The data set of the table is less than B When the data set of the table ,exists be better than in
EXISTS Subqueries can often also be used JOIN Instead of , What's the best need to be analyzed in detail
4.count(*) Query optimization
EXPLAIN select count(1) from employees;
EXPLAIN select count(id) from employees;
EXPLAIN select count(name) from employees;
EXPLAIN select count(*) from employees;



count(1) Follow count( Field ) The execution process is similar to , however count(1) There is no need to take out the field statistics , Just use constants 1 Do statistics ,count( Field ) You also need to take out the fields , So theoretically count(1) Than count( Field ) It will be faster .
count(*) It's an exception ,mysql It doesn't take all the fields out , It's optimized , No value , Add by line , It's very efficient , So you don't need to use count( Name ) or count( Constant ) To replace count(*).
边栏推荐
猜你喜欢

What's a good gift for your girlfriend on the Chinese Valentine's day in 2022? Practical and beautiful gift recommendation

Graphic pipeline foundation (I)

Leetcode brush question diary sword finger offer II 055. binary search tree iterator

Problem solving for ACM freshmen in Jiangzhong on October 26

redis实现分布式锁思路及redission分布式锁主流程分析

RayMarching realizes volume light rendering

气传导蓝牙耳机哪个好、气传导蓝牙耳机排行榜

【C语言】字符串库函数介绍及模拟

2021-11-10

Icc2 (IV) routing and postroute optimization
随机推荐
Battle plague Cup -- strange shape
结构体、位段、联合体(共用体)的大小如何计算
[PTA----输出全排列]
OJ 1505 保险丝
OJ 1018 counting game
代码整洁之道(一)
Two dimensional array practice: spiral matrix
[basic knowledge of binary tree]
OJ 1020 最小的回文数
OJ 1089 Spring Festival travel
气传导蓝牙耳机哪个好、气传导蓝牙耳机排行榜
Pyppeter drop-down selenium drop-down
Leetcode 刷题日记 剑指 Offer II 055. 二叉搜索树迭代器
项目编译NoSuch***Error问题
OJ 1242 freshman's debut
Leetcode skimming diary sword finger offer II 050. sum of downward path nodes
Development of Quantitative Trading Robot System
2022年七夕送女朋友什么礼物好?实用且好看的礼物推荐
动态规划--简单题型之爬楼梯
OJ 1045 反转然后相加