Created
July 19, 2023 13:11
-
-
Save kgorskowski/013c9fc52b128bd9b1ca29c30cec1ea8 to your computer and use it in GitHub Desktop.
kafka broker sizing calculator based on available storage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calculate_retention_settings(storage_per_broker, brokers, partitions, replication, data_rate): | |
# Calculate total available storage in the cluster | |
# consider the lifecycle management cycle as overhead (runs per default every 5 minutes) | |
lc_overhead = data_rate * 1024 * 1024 * 60 * 5 | |
# total storage in GB | |
total_storage = storage_per_broker * brokers | |
# total storage in bytes | |
total_storage_bytes = total_storage * (1024*1024*1024) | |
# usable bytes minus 20% and lifecycle overhead | |
usable_bytes = (total_storage_bytes * 0.8) - lc_overhead | |
# Calculate the desired retention bytes value | |
retention_bytes = usable_bytes / (partitions * replication) | |
# bytes per second based on data rate in mb/s | |
bytes_per_second = data_rate * 1000 * 1000 | |
# Calculate the maximum retention time in ms for given storage | |
retention_seconds = int(usable_bytes / (data_rate * 1024 * 1024)) | |
# Print the calculated settings | |
print(f"Available storage for the broker: {total_storage} GB") | |
print(f"recommended retention bytes settings for topic: {retention_bytes} bytes / {int(retention_bytes/(1024*1024*1024))} GB") | |
print(f"This leaves {total_storage - (int(retention_bytes/(1024*1024*1024)* partitions * replication))} GB overhead storage") | |
print(f"Max. possible Retention time based on storage limit: {retention_seconds} seconds / {int(retention_seconds / 60) } minutes") | |
# Input variables | |
storage_per_broker = 30 # Available storage per broker in GB | |
brokers = 3 # Number of brokers in the cluster | |
partitions = 6 # Number of partitions for the topic | |
replication = 2 # Level of replication for the topic | |
data_rate = 15 # Expected data rate in MB per second | |
# Calculate and print the maximum retention settings | |
calculate_retention_settings(storage_per_broker, brokers, partitions, replication, data_rate) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment