Last active
May 8, 2025 13:53
-
-
Save stephanGarland/cd6a5c667e8406cf9cfb661a12399ddd to your computer and use it in GitHub Desktop.
Designed for use with Ansible, but works on its own as well – calculate the tricky parts of InnoDB's buffer pool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# LICENSE | |
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. | |
# If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
# Copyright 2024 Stephan Garland | |
""" | |
Calculates various parameters for the InnoDB buffer pool based on a simple input. | |
If system memory isn't provided ([`-m`, `--sys-memory-mb`]), the tool attempts to | |
get this information via `sysctl hw.memsize`. | |
The InnoDB engine is extremely strict about how its buffer pool can be sized. | |
Ref: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_chunk_size | |
Other than size restrictions (which are unlikely to be violated in normal practice), there are | |
complicated relationships between the three main variables that must be met, plus some | |
best practices to be observed. Getting these three correct can be challenging, hence this calculator. | |
innodb_buffer_pool_chunk_size: | |
Description: This is the least likely to ever be used during normal operation, but it still must be observed. | |
It defines the size of chunks to add or remove to the buffer pool during online resizing operations. | |
Default: 134217728 bytes (128 MiB) | |
Minimum: 1048576 bytes (1 MiB) | |
Maximum: innodb_buffer_pool_size / innodb_buffer_pool_instances | |
Increment: 1048576 bytes (1 MiB) | |
Dynamic: False | |
Recommendation: num_chunks (innodb_buffer_pool_size / innodb_buffer_pool_chunk_size) <= 1000 | |
innodb_buffer_pool_instances: | |
Description: This is the number of instances the buffer pool is split into. Each instance gets its own | |
flush list, LRU, mutex, etc. This should generally be raised as high as reasonable for better throughput. | |
Default: innodb_buffer_pool_size < 1 GiB ? 1 : 8 | |
Minimum: 1 | |
Maximum: 64 | |
Increment: 1 | |
Dynamic: False | |
Recommendation: As high as possible while keeping each instance size | |
(innodb_buffer_pool_size / innodb_buffer_pool_instances) >= 1 GiB | |
innodb_buffer_pool_size: | |
Description: This is the size of the buffer pool itself. Unlike Postgres, this is the sole pool of memory | |
available to MySQL (Postgres uses a combination of internal and OS-level memory caches). As such, | |
the recommended conservative starting value for this on a dedicated instance is 75-80% of system memory, | |
but can go higher with caution. The total size will be approximately 110% of the specified | |
buffer pool's size due to overhead control structures, and enough must be left for other MySQL operations, | |
as well as the OS's own needs (and those of any other programs operating on the server). | |
The buffer pool must be equal to or a multiple of innodb_buffer_pool_chunk_size * innodb_buffer_pool_instances, | |
and all other requirements for other variables must also still be met. | |
Default: 134217728 bytes (128 MiB) | |
Mininum: 5242880 bytes (5 MiB) | |
Maximum: (2^64) - 1 bytes (16 EiB - 1 byte) | |
Increment: innodb_buffer_pool_chunk_size | |
Dynamic: true | |
Recommendation: As high as possible without being OOMkilled | |
""" | |
import argparse | |
import subprocess | |
from collections import namedtuple | |
def make_args() -> argparse.Namespace: | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"-c", | |
"--chunk-size-bytes", | |
help="size of innodb buffer pool chunks in bytes", | |
type=int, | |
) | |
parser.add_argument( | |
"-i", | |
"--instance-count", | |
help="number of innodb buffer pool instances", | |
type=int, | |
) | |
parser.add_argument( | |
"-f", | |
"--force", | |
help="force exact values to be used as given", | |
action="store_true", | |
) | |
parser.add_argument( | |
"-g", | |
"--sys-memory-gb", | |
default=0.0, | |
help="amount of system memory in GB as reported by ansible, sysctl, etc.", | |
type=float, | |
) | |
parser.add_argument( | |
"-m", | |
"--sys-memory-mb", | |
default=0, | |
help="amount of system memory in MB as reported by ansible, sysctl, etc.", | |
type=int, | |
) | |
parser.add_argument( | |
"-p", | |
"--pool-size-percent-of-mem", | |
help="size of buffer pool as percent of system memory", | |
type=float, | |
) | |
parser.add_argument( | |
"-s", | |
"--pool-size-bytes", | |
help="size of innodb buffer pool in bytes", | |
type=int, | |
) | |
return parser.parse_args() | |
def pool_is_multiple(pool_size: int, chunk_size: int, instances: int) -> bool: | |
"""Validates that the buffer pool is a multiple of its consitituent parts.""" | |
return pool_size % (chunk_size * instances) == 0 | |
def instances_gte_one_gib(pool_size: int, instances: int) -> bool: | |
"""Validates that each instance is at least 1 GiB.""" | |
return (pool_size / instances) / 2**30 >= 1 | |
def instances_count_allowed(instances: int) -> bool: | |
"""Validates that the number of instances is within the allowable range.""" | |
return 0 < instances < 65 | |
def pool_size_allowed(pool_size: int) -> bool: | |
"""Validates that the pool size is wthin the allowable range.""" | |
return 5 * 2**20 < pool_size < 2**64 | |
def chunk_size_allowed(chunk_size: int, pool_size: int, instances: int) -> bool: | |
"""Validates that the chunk size is valid.""" | |
return 2**20 < chunk_size < (pool_size / instances) | |
def chunk_count_optimal(chunk_size: int, pool_size: int) -> bool: | |
"""Validates that the number of chunks is under the recommended limit.""" | |
return (pool_size / chunk_size) <= 1000 | |
def all_checks_true( | |
pool_size_bytes: int, chunk_size_bytes: int, instance_count: int | |
) -> bool: | |
"""Validates that all other checks are true.""" | |
return all( | |
[ | |
pool_is_multiple(pool_size_bytes, chunk_size_bytes, instance_count), | |
instances_gte_one_gib(pool_size_bytes, instance_count), | |
instances_count_allowed(instance_count), | |
pool_size_allowed(pool_size_bytes), | |
chunk_size_allowed(chunk_size_bytes, pool_size_bytes, instance_count), | |
chunk_count_optimal(chunk_size_bytes, pool_size_bytes), | |
] | |
) | |
def get_buffer_pool_bytes(pct_of_mem: float, sys_mem_mb: int) -> int: | |
"""Returns a buffer pool size that's rounded to the floor GiB""" | |
if 1 < pct_of_mem <= 100: | |
pct_of_mem /= 100 | |
elif pct_of_mem > 100: | |
raise SystemExit("ERROR: percent of mem cannot exceed 100") | |
buf_pool_raw = sys_mem_mb * pct_of_mem | |
buf_pool_mod_gib = buf_pool_raw % 1024 | |
return int((buf_pool_raw - buf_pool_mod_gib) * 2**20) | |
if __name__ == "__main__": | |
args = make_args() | |
if args.sys_memory_mb / 1024 != args.sys_memory_gb and args.sys_memory_mb: | |
raise SystemExit( | |
"ERROR: you passed values for both `--sys-memory-mb` and `--sys-memory-gb`, and they differed – use one value" | |
) | |
elif args.sys_memory_gb and not args.sys_memory_mb: | |
args.sys_memory_mb = args.sys_memory_gb * 1024 | |
_chunk_size_bytes = 134217728 # 128 MiB, default value | |
if args.force and args.force.lower() in ["on", "true", "yes"]: | |
args.force = True | |
else: | |
args.force = False | |
if not args.sys_memory_mb: | |
sysctl_mem = subprocess.run( | |
["sysctl", "hw.memsize"], capture_output=True, encoding="utf-8" | |
) | |
if sysctl_mem.returncode: | |
raise SystemExit( | |
"ERROR: unable to determine system memory amount – please provide with `-m`" | |
) | |
args.sys_memory_mb = int(sysctl_mem.stdout.split(" ")[-1].rstrip("\n")) // 2**20 | |
if args.pool_size_percent_of_mem: | |
_pool_size_bytes = get_buffer_pool_bytes( | |
args.pool_size_percent_of_mem, args.sys_memory_mb | |
) | |
elif not all([args.pool_size_bytes, args.chunk_size_bytes]): | |
raise SystemExit( | |
"ERROR: must define pool size and chunk size, or a percentage of system memory" | |
) | |
elif not args.force: | |
# ensure it's aligned to gib boundaries | |
_pool_size_bytes = get_buffer_pool_bytes(1.0, args.pool_size_bytes) | |
else: # assume the user knows what they're doing | |
_pool_size_bytes = args.pool_size_bytes | |
_chunk_size_bytes = args.chunk_size_bytes | |
_instance_count = args.instance_count | |
if not args.force: | |
if _pool_size_bytes >= 8 * 2**30: | |
_instance_count = 8 | |
elif _pool_size_bytes >= 2 * 2**30: | |
_instance_count = 2 | |
else: | |
_instance_count = 1 | |
# calculate multiples of chunk size, up to a limit of half the buffer pool, in 64 MiB increments | |
chunk_size_mults = range(_chunk_size_bytes, int(_pool_size_bytes / 2), 64 * 2**20) | |
Candidate = namedtuple( | |
"Candidate", | |
["chunk_size_mib", "chunk_count", "instance_count", "instance_size_mib"], | |
) | |
candidates = set() | |
chunk_size_bytes = _chunk_size_bytes | |
pool_size_bytes = _pool_size_bytes | |
# don't let this go on indefinitely | |
while _pool_size_bytes > pool_size_bytes * 0.95: | |
for i in range(64): | |
if _instance_count > 64: | |
break | |
for j in range(len(chunk_size_mults)): | |
if all_checks_true( | |
_pool_size_bytes, _chunk_size_bytes, _instance_count | |
): | |
candidates.add( | |
Candidate( | |
_chunk_size_bytes // 2**20, | |
_pool_size_bytes // _chunk_size_bytes, | |
_instance_count, | |
(_pool_size_bytes // _instance_count) // 2**20, | |
) | |
) | |
# first, try adding 1 MiB | |
_chunk_size_bytes += 2**20 | |
# reset if we're out of the inner loop | |
_chunk_size_bytes = chunk_size_bytes | |
# try adding another instance | |
_instance_count += 1 | |
# subtract 1 MiB from the pool size | |
_pool_size_bytes -= 2**20 | |
# prefer highest number of buffer pool instances | |
cand = sorted(candidates, key=lambda x: x.instance_count, reverse=True) | |
innodb_prefix = "innodb_buffer_pool" | |
_pool_size_bytes = cand[0].instance_size_mib * cand[0].instance_count * 2**20 | |
mysql_vars = [] | |
mysql_vars.append( | |
f"{innodb_prefix}_chunk_size = {cand[0].chunk_size_mib * 2**20} # {cand[0].chunk_size_mib} MiB" | |
) | |
mysql_vars.append(f"{innodb_prefix}_instances = {cand[0].instance_count}") | |
mysql_vars.append( | |
f"{innodb_prefix}_size = {_pool_size_bytes} # {_pool_size_bytes / 2**30} GiB" | |
) | |
print("Place the following in your `my.cnf` file\n") | |
print("\n".join(mysql_vars)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment