表数据量影响MySQL索引选择

226次阅读

共计 10824 个字符，预计需要花费 28 分钟才能阅读完成。

现象

新建了一张员工表，插入了少量数据，索引中所有的字段均在 where 条件出现时，正确走到了 idx_nap 索引，但是 where 出现部分自左开始的索引时，却进行全表扫描，与 MySQL 官方所说的最左匹配原则“相悖”。

数据背景
CREATE TABLE `staffs` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(24) NOT NULL DEFAULT ” COMMENT ‘ 姓名 ’,
`age` int(11) NOT NULL DEFAULT ‘0’ COMMENT ‘ 年龄 ’,
`pos` varchar(20) NOT NULL DEFAULT ” COMMENT ‘ 职位 ’,
`add_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT ‘ 入职时间 ’,
PRIMARY KEY (`id`),
KEY `idx_nap` (`name`,`age`,`pos`)
) ENGINE=InnoDB AUTO_INCREMENT=8 DEFAULT CHARSET=utf8 COMMENT=’ 员工记录表 ’;

表中数据如下：
id name age pos add_time
1 July 23 dev 2018-06-04 16:02:02
2 Clive 22 dev 2018-06-04 16:02:32
3 Cleva 24 test 2018-06-04 16:02:38
4 July 23 test 2018-06-04 16:12:22
5 July 23 pre 2018-06-04 16:12:37
6 Clive 22 pre 2018-06-04 16:12:48
7 July 25 dev 2018-06-04 16:30:17

Explain 语句看下执行计划
— 全匹配走了索引
explain select * from staffs where name = ‘July’ and age = 23 and pos = ‘dev’;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE staffs NULL ref idx_nap idx_nap 140 const,const,const 1 100.00 NULL

开启优化器跟踪优化过程
— 左侧部分匹配却没有走索引，全表扫描
explain select * from staffs where name = ‘July’ and age = 23;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE staffs2 NULL ALL idx_nap NULL NULL NULL 6 50.00 Using where
— 开启优化器跟踪
set session optimizer_trace=’enabled=on’;
— 在执行完查询语句后，在执行以下的 select 语句可以查看具体的优化器执行过程
select * from information_schema.optimizer_trace;

Trace 部分的内容
{
“steps”: [
{
“join_preparation”: {
“select#”: 1,
“steps”: [
{
“expanded_query”: “/* select#1 */ select `staffs`.`id` AS `id`,`staffs`.`name` AS `name`,`staffs`.`age` AS `age`,`staffs`.`pos` AS `pos`,`staffs`.`add_time` AS `add_time` from `staffs` where ((`staffs`.`name` = ‘July’) and (`staffs`.`age` = 23))”
}
]
}
},
{
“join_optimization”: {
“select#”: 1,
“steps”: [
{
“condition_processing”: {
“condition”: “WHERE”,
“original_condition”: “((`staffs`.`name` = ‘July’) and (`staffs`.`age` = 23))”,
“steps”: [
{
“transformation”: “equality_propagation”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
},
{
“transformation”: “constant_propagation”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
},
{
“transformation”: “trivial_condition_removal”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
}
]
}
},
{
“substitute_generated_columns”: {
}
},
{
“table_dependencies”: [
{
“table”: “`staffs`”,
“row_may_be_null”: false,
“map_bit”: 0,
“depends_on_map_bits”: [
]
}
]
},
{
“ref_optimizer_key_uses”: [
{
“table”: “`staffs`”,
“field”: “name”,
“equals”: “‘July'”,
“null_rejecting”: false
},
{
“table”: “`staffs`”,
“field”: “age”,
“equals”: “23”,
“null_rejecting”: false
}
]
},
{
“rows_estimation”: [
{
“table”: “`staffs`”,
“range_analysis”: {
“table_scan”: {
“rows”: 6,
“cost”: 4.3
},
“potential_range_indexes”: [
{
“index”: “PRIMARY”,
“usable”: false,
“cause”: “not_applicable”
},
{
“index”: “idx_nap”,
“usable”: true,
“key_parts”: [
“name”,
“age”,
“pos”,
“id”
]
}
],
“setup_range_conditions”: [
],
“group_index_range”: {
“chosen”: false,
“cause”: “not_group_by_or_distinct”
},
“analyzing_range_alternatives”: {
“range_scan_alternatives”: [
{
“index”: “idx_nap”,
“ranges”: [
“July <= name <= July AND 23 <= age <= 23”
],
“index_dives_for_eq_ranges”: true,
“rowid_ordered”: false,
“using_mrr”: false,
“index_only”: false,
“rows”: 3,
“cost”: 4.61,
“chosen”: false,
“cause”: “cost”
}
],
“analyzing_roworder_intersect”: {
“usable”: false,
“cause”: “too_few_roworder_scans”
}
}
}
}
]
},
{
“considered_execution_plans”: [
{
“plan_prefix”: [
],
“table”: “`staffs`”,
“best_access_path”: {
“considered_access_paths”: [
{
// 可以看到这边 MySQL 计算得到使用索引的成本为 2.6
“access_type”: “ref”,
“index”: “idx_nap”,
“rows”: 3,
“cost”: 2.6,
“chosen”: true
},
{
// 而全表扫描计算所得的成本为 2.2
“rows_to_scan”: 6,
“access_type”: “scan”,
“resulting_rows”: 6,
“cost”: 2.2,
“chosen”: true
}
]
},
// 因此选择了成本更低的 scan
“condition_filtering_pct”: 100,
“rows_for_plan”: 6,
“cost_for_plan”: 2.2,
“chosen”: true
}
]
},
{
“attaching_conditions_to_tables”: {
“original_condition”: “((`staffs`.`age` = 23) and (`staffs`.`name` = ‘July’))”,
“attached_conditions_computation”: [
],
“attached_conditions_summary”: [
{
“table”: “`staffs`”,
“attached”: “((`staffs`.`age` = 23) and (`staffs`.`name` = ‘July’))”
}
]
}
},
{
“refine_plan”: [
{
“table”: “`staffs`”
}
]
}
]
}
},
{
“join_execution”: {
“select#”: 1,
“steps”: [
]
}
}
]
}

增加表数据量
— 接下来增大表的数据量
INSERT INTO `staffs` (`name`, `age`, `pos`, `add_time`)
VALUES
(‘July’, 25, ‘dev’, ‘2018-06-04 16:30:17’),
(‘July’, 23, ‘dev1’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev2’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev3’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev4’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev6’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev5’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev7’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev8’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev9’, ‘2018-06-04 16:02:02’),
(‘July’, 23, ‘dev10’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev1’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev2’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev3’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev4’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev6’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev5’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev7’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev8’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev9’, ‘2018-06-04 16:02:02’),
(‘Clive’, 23, ‘dev10’, ‘2018-06-04 16:02:02’);

执行 Explain
— 再次执行同样的查询语句，会发现走到索引上了
explain select * from staffs where name = ‘July’ and age = 23;
id select_type table partitions type possible_keys key key_len ref rows filtered Extra
1 SIMPLE staffs NULL ref idx_nap idx_nap 78 const,const 13 100.00 NULL

查看新的 Trace 内容
— 再看下优化器执行过程
{
“steps”: [
{
“join_preparation”: {
“select#”: 1,
“steps”: [
{
“expanded_query”: “/* select#1 */ select `staffs`.`id` AS `id`,`staffs`.`name` AS `name`,`staffs`.`age` AS `age`,`staffs`.`pos` AS `pos`,`staffs`.`add_time` AS `add_time` from `staffs` where ((`staffs`.`name` = ‘July’) and (`staffs`.`age` = 23))”
}
]
}
},
{
“join_optimization”: {
“select#”: 1,
“steps”: [
{
“condition_processing”: {
“condition”: “WHERE”,
“original_condition”: “((`staffs`.`name` = ‘July’) and (`staffs`.`age` = 23))”,
“steps”: [
{
“transformation”: “equality_propagation”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
},
{
“transformation”: “constant_propagation”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
},
{
“transformation”: “trivial_condition_removal”,
“resulting_condition”: “((`staffs`.`name` = ‘July’) and multiple equal(23, `staffs`.`age`))”
}
]
}
},
{
“substitute_generated_columns”: {
}
},
{
“table_dependencies”: [
{
“table”: “`staffs`”,
“row_may_be_null”: false,
“map_bit”: 0,
“depends_on_map_bits”: [
]
}
]
},
{
“ref_optimizer_key_uses”: [
{
“table”: “`staffs`”,
“field”: “name”,
“equals”: “‘July'”,
“null_rejecting”: false
},
{
“table”: “`staffs`”,
“field”: “age”,
“equals”: “23”,
“null_rejecting”: false
}
]
},
{
“rows_estimation”: [
{
“table”: “`staffs`”,
“range_analysis”: {
“table_scan”: {
“rows”: 27,
“cost”: 8.5
},
“potential_range_indexes”: [
{
“index”: “PRIMARY”,
“usable”: false,
“cause”: “not_applicable”
},
{
“index”: “idx_nap”,
“usable”: true,
“key_parts”: [
“name”,
“age”,
“pos”,
“id”
]
}
],
“setup_range_conditions”: [
],
“group_index_range”: {
“chosen”: false,
“cause”: “not_group_by_or_distinct”
},
“analyzing_range_alternatives”: {
“range_scan_alternatives”: [
{
“index”: “idx_nap”,
“ranges”: [
“July <= name <= July AND 23 <= age <= 23”
],
“index_dives_for_eq_ranges”: true,
“rowid_ordered”: false,
“using_mrr”: false,
“index_only”: false,
“rows”: 13,
“cost”: 16.61,
“chosen”: false,
“cause”: “cost”
}
],
“analyzing_roworder_intersect”: {
“usable”: false,
“cause”: “too_few_roworder_scans”
}
}
}
}
]
},
{
“considered_execution_plans”: [
{
“plan_prefix”: [
],
“table”: “`staffs`”,
“best_access_path”: {
“considered_access_paths”: [
{
// 使用索引的成本变为了 5.3
“access_type”: “ref”,
“index”: “idx_nap”,
“rows”: 13,
“cost”: 5.3,
“chosen”: true
},
{
//scan 的成本变为了 6.4
“rows_to_scan”: 27,
“access_type”: “scan”,
“resulting_rows”: 27,
“cost”: 6.4,
“chosen”: false
}
]
},
// 使用索引查询的成本更低，因此选择了走索引
“condition_filtering_pct”: 100,
“rows_for_plan”: 13,
“cost_for_plan”: 5.3,
“chosen”: true
}
]
},
{
“attaching_conditions_to_tables”: {
“original_condition”: “((`staffs`.`age` = 23) and (`staffs`.`name` = ‘July’))”,
“attached_conditions_computation”: [
],
“attached_conditions_summary”: [
{
“table”: “`staffs`”,
“attached”: null
}
]
}
},
{
“refine_plan”: [
{
“table”: “`staffs`”
}
]
}
]
}
},
{
“join_execution”: {
“select#”: 1,
“steps”: [
]
}
}
]
}