1. 版本
1)操作系统版本
cat /proc/version
Linux version 3.10.0-957.5.1.el7.x86_64 (mockbuild@kbuilder.bsys.centos.org) (gcc version 4.8.5 20150623 (Red Hat 4.8.5-36) (GCC) ) #1
2)数据库版本
mysql --version
mysql Ver 14.14 Distrib 5.7.22, for linux-glibc2.12 (x86_64) using EditLine wrapper
2. 问题描述
2.1 问题发现
有如下一则 limit 查询,正常执行时间为800-900 ms,有什么优化方案?
select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 900000,10;
表结构如下:
CREATE TABLE `test_table` (
`salary_id` int(11) NOT NULL AUTO_INCREMENT,
`salary_no` varchar(32) DEFAULT NULL,
`merchant_id` int(11) NOT NULL,
`customer_id` int(11) DEFAULT NULL,
`wallet_no` varchar(32) DEFAULT NULL,
`batch_num` varchar(32) NOT NULL,
`batch_id` int(11) DEFAULT NULL,
`biz_type` varchar(32) NOT NULL,
`name` varchar(32) DEFAULT NULL,
`id_card` varchar(18) NOT NULL,
`account` varchar(64) DEFAULT NULL,
`bank_name` varchar(128) DEFAULT NULL,
`apply_amount` int(11) NOT NULL DEFAULT '0',
`origin_amount` int(11) unsigned NOT NULL DEFAULT '0',
`origin_fee` int(11) unsigned NOT NULL DEFAULT '0',
`actual_amount` int(11) unsigned NOT NULL DEFAULT '0',
`actual_fee` int(11) unsigned NOT NULL DEFAULT '0',
`need_pay` tinyint(1) DEFAULT '1',
`factory_id` int(11) unsigned NOT NULL,
`factory_name` varchar(64) DEFAULT NULL,
`salary_time` datetime DEFAULT NULL,
`status` varchar(32) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
`user_mobile` varchar(32) DEFAULT NULL,
`pay_note` varchar(64) DEFAULT NULL,
`remark` varchar(128) DEFAULT NULL,
`create_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
`update_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (`salary_id`),
KEY `idx_id_card` (`id_card`),
KEY `idx_time` (`salary_time`),
KEY `idx_salary_no` (`salary_no`) USING BTREE,
KEY `idx_mix` (`merchant_id`,`batch_num`,`need_pay`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=773795 DEFAULT CHARSET=utf8mb4;
3.优化方案
先给出优化方案,然后再详细解释下为什么这么做。优化方案由如下两部分组成:
1)改写sql如下:
SELECT *
FROM test_table a
INNER JOIN
(SELECT salary_id
FROM test_table
WHERE merchant_id = 43
AND STATUS = 'SUCCESS'
LIMIT 900000,
10) b ON a.salary_id = b.salary_id;
2)添加 (merchant_id,STATUS) 组合索引
alter table test_table add index idx_merchant_id_status(merchant_id,STATUS);
4.优化方案原理分析
1) 未添加索引前,两种sql写法的执行计划及执行时间
mysql> explain select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10;
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
| 1 | SIMPLE | test_table | ref | idx_mix | idx_mix | 4 | const | 356237 | Using where; Using filesort |
+----+-------------+-------------------+------+---------------+---------+---------+-------+--------+-----------------------------+
1 row in set (0.01 sec)
mysql> explain
-> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
| 1 | PRIMARY | | ALL | NULL | NULL | NULL | NULL | 90010 | NULL |
| 1 | PRIMARY | a | eq_ref | PRIMARY | PRIMARY | 4 | b.salary_id | 1 | NULL |
| 2 | DERIVED | test_table | ref | idx_mix | idx_mix | 4 | const | 356237 | Using where; Using filesort |
+----+-------------+-------------------+--------+---------------+---------+---------+-------------+--------+-----------------------------+
3 rows in set (0.00 sec)
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10
-> ;
......(结果省略)
......
......
10 rows in set (0.82 sec)
mysql> SELECT * FROM test_table a INNER JOIN (SELECT salary_id FROM test_table WHERE merchant_id = 43 AND STATUS = 'SUCCESS' ORDER BY salary_id DESC LIMIT 90000, 10) b ON a.salary_id = b.salary_id;
......(结果省略)
......
......
10 rows in set (0.52 sec)
2)添加索引后,两种sql写法的执行计划及执行时间
mysql> explain select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10;
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
| 1 | SIMPLE | test_table | ref | idx_mix,idx_merchant_id_status | idx_merchant_id_status | 38 | const,const | 350286 | Using where |
+----+-------------+-------------------+------+--------------------------------+------------------------+---------+-------------+--------+-------------+
1 row in set (0.00 sec)
mysql> explain SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
| id | select_type | table | type | possible_keys | key | key_len | ref | rows | Extra |
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
| 1 | PRIMARY | | ALL | NULL | NULL | NULL | NULL | 90010 | NULL |
| 1 | PRIMARY | a | eq_ref | PRIMARY | PRIMARY | 4 | b.salary_id | 1 | NULL |
| 2 | DERIVED | test_table | ref | idx_mix,idx_merchant_id_status | idx_merchant_id_status | 38 | const,const | 350286 | Using where; Using index |
+----+-------------+-------------------+--------+--------------------------------+------------------------+---------+-------------+--------+--------------------------+
3 rows in set (0.00 sec)
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 90000,10
-> ;
......(搜索结果省略)
......
......
10 rows in set (0.21 sec)
mysql> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 90000,
-> 10) b ON a.salary_id = b.salary_id;
......(搜索结果省略)
......
......
10 rows in set (0.05 sec)
##为什么改成子查询加关联查询以后效率提高?为什么添加了组合索引效率就提高?
3)未添加 (merchant_id,STATUS) 组合索引前两个sql执行计划解析
原始 sql 走 idx_mix 索引,需要扫描 356237 条索引记录,并回表356237次拿出记录的所有字段(如果表中有这么多符合条件的记录),然后通过主键id进行排序(通常索引上发现一条匹配的记录就进行一次回表,可以通过MRR进行优化),取出最后10条
NOTE:因为sql需要根据主键排序并且进行limit 过滤,但是表中目前只有一个 idx_mix 所以,所以需要扫描所有符合merchant_id=43的记录并进行回表,然后对所有符合条件的记录进行排序,再进行limit过滤。如果表中有(merchant_id,STATUS)组合索引,那么只需要扫描90010次索引并回表90010次,且无需排序(索引组织表的主键是有序的,所以创建的二级索引中,当二级索引所有列都做等值匹配时,最后回表时主键肯定是顺序的)。如果表中有(merchant_id)的单列索引,那么也无需排序,且只要过滤到90010条记录时就会停止,无需访问所有的merchant_id=43的记录。下面是添加(merchant_id,STATUS)组合索引前后,相关查询慢日志,体现了表扫描的具体行数(最后一条是添加了组合索引的慢查,如推断只扫描了90010条记录,当然也有可能出现扫描行数少于90010的情况)
mysql> select * from mysql.slow_log;
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
| start_time | user_host | query_time | lock_time | rows_sent | rows_examined | db | last_insert_id | insert_id | server_id | sql_text | thread_id |
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
| 2020-03-18 14:32:17.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 346899 | xxx | 0 | 0 | 0 | select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 90000,10 | 33956 |
| 2020-03-18 14:32:44.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 346919 | xxx | 0 | 0 | 0 | SELECT * FROM test_table a INNER JOIN (SELECT salary_id FROM test_table WHERE merchant_id = 43 AND STATUS = 'SUCCESS' ORDER BY salary_id DESC LIMIT 90000, 10) b ON a.salary_id = b.salary_id | 33956 |
| 2020-03-18 14:35:54.000000 | xxx@yyy | 00:00:00.000000 | 00:00:00.000000 | 10 | 90010 | xxx | 0 | 0 | 0 | select * from test_table where merchant_id = 43 and status = 'SUCCESS' order by salary_id desc limit 90000,10 | 33956 |
+----------------------------+--------------------------------+-----------------+-----------------+-----------+---------------+-------------------------+----------------+-----------+-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+
4 rows in set (0.00 sec)
原始 sql 改写后,子查询需要扫描 356237 条索引记录,并取符合条件的最后10条索引记录中的主键跟 a 表进行关联,通过a表的主键访问访问10条记录(拿出所有字段)
## 原始 sql 改写后多了一个关联操作,但是子查询只需要访问 356237 条索引记录,不需要取出记录的所有字段。所以效率高于改写前的sql
4)加了 (merchant_id,STATUS) 组合索引后两个执行计划解析
原始sql走 idx_merchant_id_status 索引,需要访问 90010 条索引记录,并回表 90010 次拿出记录的所有字段(如果表中有这么多符合条件的记录),并且消除了排序过程(排序过程为什么会被消除前文已经解释了),取出最后10条。
## 我们可以看到只是添加索引,并未修改语句的情况下,mysql的执行效率提高了4倍左右,这是因为添加该组合索引后,消除了排序过程,并且扫描的记录数和回表次数从30多万次减少到90010次
改写后的 sql 子查询 需要扫描90010条记录,并取符合条件的最后10条索引记录中的主键跟 a 表进行关联,通过a表的主键访问访问10条记录(拿出所有字段)。
##因为子查询使用了覆盖索引,并且取消了排序,减少了扫描的行数,所以效率更高。
4. 总结分析
对比发现我们看到改写前的sql同改写后的sql扫描的记录数一样,但是改写前的sql需要拿出相关记录的所有字段(查出所有符合条件的记录并排序后再进行 limit 过滤),而改写后的子查询只需要扫描索引获得拿出salary_id字段并排序。这时前者执行时间是0.82秒,或者执行时间是0.52秒。
NOTE:因为这里的
当加上(merchant_id,STATUS)索引,对于改写前的sql来说消除了排序,执行时间减少到0.21秒。对于改写后的sql 来说因为1.消除了排序,2.使用了覆盖索引(消除了排序;不用回表;减少了引擎层同server层的交互),执行时间减少到0.05s
添加(merchant_id,STATUS)索引前查看两个sql执行后的profile如下:
可以看到 Creating sort index 部分耗时最久,所以消除排序对sql执行性能影响最明显
mysql> set profiling=1;
Query OK, 0 rows affected, 1 warning (0.00 sec)
mysql> SELECT *
-> FROM test_table a
-> INNER JOIN
-> (SELECT salary_id
-> FROM test_table
-> WHERE merchant_id = 43
-> AND STATUS = 'SUCCESS'
-> ORDER BY salary_id DESC
-> LIMIT 900090,
-> 10) b ON a.salary_id = b.salary_id;
Empty set (0.66 sec)
mysql>
mysql> select * from test_table where
-> merchant_id = 43 and status = 'SUCCESS' order by salary_id desc
-> limit 900090,10;
Empty set (0.82 sec)
mysql> show profile for query 1;
+----------------------+----------+
| Status | Duration |
+----------------------+----------+
| starting | 0.000141 |
| checking permissions | 0.000008 |
| checking permissions | 0.000008 |
| checking permissions | 0.000003 |
| checking permissions | 0.000004 |
| Opening tables | 0.001994 |
| init | 0.000109 |
| System lock | 0.000015 |
| optimizing | 0.000008 |
| optimizing | 0.000034 |
| statistics | 0.002187 |
| preparing | 0.000054 |
| Sorting result | 0.000057 |
| statistics | 0.000072 |
| preparing | 0.000020 |
| executing | 0.000018 |
| Sending data | 0.000018 |
| executing | 0.000003 |
| Sending data | 0.000003 |
| Creating sort index | 0.652778 |
| end | 0.000022 |
| query end | 0.000016 |
| closing tables | 0.000005 |
| removing tmp table | 0.001012 |
| closing tables | 0.000034 |
| freeing items | 0.000068 |
| cleaning up | 0.000274 |
+----------------------+----------+
27 rows in set, 1 warning (0.00 sec)
mysql> show profile for query 2;
+----------------------+----------+
| Status | Duration |
+----------------------+----------+
| starting | 0.000093 |
| checking permissions | 0.000014 |
| checking permissions | 0.000004 |
| Opening tables | 0.000023 |
| init | 0.000129 |
| System lock | 0.000020 |
| optimizing | 0.000037 |
| statistics | 0.000145 |
| preparing | 0.000045 |
| Sorting result | 0.000009 |
| executing | 0.000003 |
| Sending data | 0.000017 |
| Creating sort index | 0.820004 |
| end | 0.000017 |
| query end | 0.000010 |
| closing tables | 0.000023 |
| freeing items | 0.000073 |
| cleaning up | 0.000070 |
+----------------------+----------+
18 rows in set, 1 warning (0.00 sec)
mysql>
作者:database_shaofei