Last active
May 31, 2023 17:06
-
-
Save heathdutton/931397c38960683dc697a20104bbf68e to your computer and use it in GitHub Desktop.
Give a MySQL (or aurora) database a simple health check, to find issues and quick wins
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Give a quick MySQL/AuroraDB database a simple health check. | |
-- See comments below to customize to your needs. | |
-- (this is all one query) | |
-- Step 1: Find very large tables | |
SELECT 'Table is very large' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
TABLE_SCHEMA AS 'DB', | |
TABLE_NAME AS `Table`, | |
NULL AS `Column`, | |
CONCAT(ROUND((DATA_LENGTH + INDEX_LENGTH) / 1099511627776, 1), | |
'TB on disk') AS `Info`, | |
NULL AS `Sample` | |
FROM INFORMATION_SCHEMA.TABLES | |
WHERE TABLE_SCHEMA NOT IN ('mysql', 'INFORMATION_SCHEMA', 'performance_schema') | |
AND (DATA_LENGTH + INDEX_LENGTH) > 1099511627776 * 1 -- 1 TB is considered a "very large" table | |
-- Step 2: Find tables with poorly sized auto-increment columns | |
UNION ALL | |
SELECT 'Auto increment is high' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
aic.TABLE_SCHEMA, | |
aic.TABLE_NAME, | |
aic.COLUMN_NAME, | |
CONCAT(ROUND(aic.ratio * 100), | |
'% of max value') AS `Info`, | |
NULL AS `Sample` | |
FROM (SELECT TABLE_SCHEMA, | |
TABLE_NAME, | |
COLUMN_NAME, | |
ROUND(AUTO_INCREMENT / (CASE DATA_TYPE | |
WHEN 'tinyint' THEN 255 | |
WHEN 'smallint' THEN 65535 | |
WHEN 'mediumint' THEN 16777215 | |
WHEN 'int' THEN 4294967295 | |
WHEN 'bigint' THEN 18446744073709551615 | |
END >> IF(LOCATE('unsigned', COLUMN_TYPE) > 0, 0, 1) | |
), 2) AS ratio | |
FROM INFORMATION_SCHEMA.COLUMNS | |
INNER JOIN INFORMATION_SCHEMA.TABLES USING (TABLE_SCHEMA, TABLE_NAME) | |
WHERE TABLE_SCHEMA NOT IN ('mysql', 'INFORMATION_SCHEMA', 'performance_schema') | |
AND EXTRA = 'auto_increment' | |
ORDER BY 4 DESC) AS aic | |
WHERE aic.ratio >= 0.5 -- 50% of the auto_increment range in use is a big cause for alarm | |
-- Step 3: Find queries currently running that are slow | |
UNION ALL | |
SELECT 'Query is slow' AS `Issue`, | |
`User`, | |
`Host`, | |
`DB`, | |
NULL AS `Table`, | |
NULL AS `Column`, | |
CONCAT('(', `Count`, 'x ', `Seconds`, 's) ', | |
`Info`) AS `Info`, | |
`Sample` AS `Sample` | |
FROM (SELECT `User`, | |
`Host`, | |
`DB`, | |
CONCAT(SUBSTRING(INFO, 1, LEAST(IF(LOCATE('0', INFO) > 0, LOCATE('0', INFO), 90), | |
IF(LOCATE('1', INFO) > 0, LOCATE('1', INFO), 90), | |
IF(LOCATE('2', INFO) > 0, LOCATE('2', INFO), 90), | |
IF(LOCATE('3', INFO) > 0, LOCATE('3', INFO), 90), | |
IF(LOCATE('4', INFO) > 0, LOCATE('4', INFO), 90), | |
IF(LOCATE('5', INFO) > 0, LOCATE('5', INFO), 90), | |
IF(LOCATE('6', INFO) > 0, LOCATE('6', INFO), 90), | |
IF(LOCATE('7', INFO) > 0, LOCATE('7', INFO), 90), | |
IF(LOCATE('8', INFO) > 0, LOCATE('8', INFO), 90), | |
IF(LOCATE('9', INFO) > 0, LOCATE('9', INFO), 90), | |
IF(LOCATE('"', INFO) > 5, LOCATE('"', INFO), 90)) - 1), | |
'…') AS `Info`, | |
COUNT(*) AS `Count`, | |
ROUND(COUNT(*) * AVG(TIME)) AS `Seconds`, | |
Info AS `Sample` | |
FROM INFORMATION_SCHEMA.PROCESSLIST | |
WHERE COMMAND NOT IN ('Sleep', 'Daemon') | |
AND INFO NOT LIKE '%INFORMATION_SCHEMA.PROCESSLIST%' | |
AND TIME > 0 | |
GROUP BY `Info` | |
ORDER BY `Seconds` DESC) AS processlist_checks | |
WHERE processlist_checks.`Count` > 100 -- 100 similar queries at once or... | |
OR processlist_checks.`Seconds` > 60 -- 1 minutes average time consumption is cause to optimize the query | |
-- Step 4: Find queries that need attention in the slow query log (if enabled) | |
UNION ALL | |
SELECT * | |
FROM (SELECT 'Suboptimal query in slow_log' AS `Issue`, | |
NULL AS `User`, | |
user_host AS `Host`, | |
db AS `DB`, | |
NULL AS `Table`, | |
NULL AS `Column`, | |
CONCAT(SUBSTRING(sql_text, 1, LEAST(IF(LOCATE('0', sql_text) > 0, LOCATE('0', sql_text), 90), | |
IF(LOCATE('1', sql_text) > 0, LOCATE('1', sql_text), 90), | |
IF(LOCATE('2', sql_text) > 0, LOCATE('2', sql_text), 90), | |
IF(LOCATE('3', sql_text) > 0, LOCATE('3', sql_text), 90), | |
IF(LOCATE('4', sql_text) > 0, LOCATE('4', sql_text), 90), | |
IF(LOCATE('5', sql_text) > 0, LOCATE('5', sql_text), 90), | |
IF(LOCATE('6', sql_text) > 0, LOCATE('6', sql_text), 90), | |
IF(LOCATE('7', sql_text) > 0, LOCATE('7', sql_text), 90), | |
IF(LOCATE('8', sql_text) > 0, LOCATE('8', sql_text), 90), | |
IF(LOCATE('9', sql_text) > 0, LOCATE('9', sql_text), 90), | |
IF(LOCATE('"', sql_text) > 5, LOCATE('"', sql_text), 90)) - 1), | |
'…') AS `Info`, | |
`sql_text` AS `Sample` | |
FROM mysql.slow_log | |
WHERE sql_text IS NOT NULL | |
AND (query_time > 300 -- 5 minute time consumption is cause to optimize the query | |
OR rows_examined > 10000000 -- 10 million rows examined is cause to optimize the query | |
OR rows_sent > 10000000 -- 10 million rows sent is cause to optimize the query | |
) | |
GROUP BY `Info` | |
ORDER BY query_time DESC) AS slow_log_checks | |
-- Step 5: Find tables without primary keys | |
UNION ALL | |
SELECT 'Table has no primary key' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
TABLE_SCHEMA AS `DB`, | |
TABLE_NAME AS `Table`, | |
NULL AS `Column`, | |
NULL AS `Info`, | |
NULL AS `Sample` | |
FROM INFORMATION_SCHEMA.TABLES | |
WHERE TABLE_SCHEMA NOT IN ('mysql', 'INFORMATION_SCHEMA', 'performance_schema', 'sys') | |
AND TABLE_NAME NOT IN (SELECT TABLE_NAME | |
FROM INFORMATION_SCHEMA.STATISTICS | |
WHERE TABLE_SCHEMA NOT IN ('mysql', 'INFORMATION_SCHEMA', 'performance_schema', 'sys') | |
AND INDEX_NAME = 'PRIMARY') | |
-- Step 6: Find tables that likely need optimization for storage savings | |
UNION ALL | |
SELECT 'Table needs optimization' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
TABLE_SCHEMA AS `DB`, | |
TABLE_NAME AS `Table`, | |
NULL AS `Column`, | |
CONCAT(ROUND(DATA_FREE / DATA_LENGTH), | |
'% storage savings') AS `Info`, | |
NULL AS `Sample` | |
FROM INFORMATION_SCHEMA.TABLES | |
WHERE TABLE_SCHEMA NOT IN ('mysql', 'INFORMATION_SCHEMA', 'performance_schema', 'sys') | |
AND TABLE_ROWS > 1000000 | |
AND DATA_LENGTH > 0 | |
AND (DATA_LENGTH + INDEX_LENGTH) > 1073741824 * 20 -- 20 GB of disk space and... | |
AND DATA_FREE / DATA_LENGTH > 0.75 -- 75% free space means the table needs optimization | |
-- Step 7: Find warnings or errors in general_log if available | |
UNION ALL | |
SELECT 'Errors or warnings in log' AS `Issue`, | |
NULL AS `User`, | |
user_host AS `Host`, | |
NULL AS `DB`, | |
NULL AS `Table`, | |
NULL AS `Column`, | |
CONCAT(command_type, ' ', argument) AS `Info`, | |
NULL AS `Sample` | |
FROM mysql.general_log | |
WHERE (argument LIKE '%ERROR%' OR argument LIKE '%WARNING%') | |
AND user_host NOT IN ('system user', 'event_scheduler') | |
-- Step 8: Check global variables for quick wins | |
UNION ALL | |
SELECT 'Global variable not recommended' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
NULL AS `DB`, | |
NULL AS `Table`, | |
NULL AS `Column`, | |
CONCAT('Variable ', variable_name, ' is set to ', | |
variable_value) AS `Info`, | |
NULL AS `Sample` | |
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES | |
-- These are just starting suggestions for large databases, there could be many more: | |
WHERE (variable_name = 'max_seeks_for_key' AND variable_value > 1000) -- Should be a lowe number for large tables that depend on indexes | |
OR (variable_name = 'tx_isolation' AND variable_value != 'READ-UNCOMMITTED') -- Using READ-UNCOMMITTED allows highest concurrency for selects | |
OR (variable_name = 'max_allowed_packet' AND variable_value < 100000000) -- Allow large queries (100MB) | |
OR (variable_name = 'binlog_format' AND variable_value != 'OFF') -- Only needed if there's an ETL job, will actually slow down a PITR recovery so they reccomend this be OFF if possible | |
-- Step 9: Check global status for issues | |
UNION ALL | |
SELECT 'Global status issue' AS `Issue`, | |
NULL AS `User`, | |
NULL AS `Host`, | |
NULL AS `DB`, | |
NULL AS `Table`, | |
NULL AS `Column`, | |
CONCAT('Status ', variable_name, ' is reporting ', | |
variable_value) AS `Info`, | |
NULL AS `Sample` | |
FROM INFORMATION_SCHEMA.GLOBAL_STATUS | |
-- These are just starting suggestions for large databases, there could be many more: | |
WHERE (variable_name = 'Innodb_buffer_pool_pages_dirty' AND variable_value > 1000) -- Large number of dirty pages means a lot of disk writes | |
OR (variable_name = 'Innodb_buffer_pool_wait_free' AND variable_value > 10) -- Should be small, meaning there are enough free pages to avoid waits |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment