Skip to content

Instantly share code, notes, and snippets.

@turtlemonvh
Created July 20, 2017 18:07
Show Gist options
  • Save turtlemonvh/27763481d273f6eaef7c1468a3da9194 to your computer and use it in GitHub Desktop.
Save turtlemonvh/27763481d273f6eaef7c1468a3da9194 to your computer and use it in GitHub Desktop.
Get the size of kafka topics on disk
#!/usr/bin/python
import os
from collections import defaultdict
import subprocess
kafka_log_dir = "/data/kafka/logs/"
size_unit = pow(1024.0, 2) # mbs
topic_sizes = defaultdict(int)
for dir in os.listdir(kafka_log_dir):
fullpath = os.path.join(kafka_log_dir, dir)
if not os.path.isdir(fullpath):
continue
groupname = "-".join(dir.split("-")[:-1])
# Didn't have a "-" in it
if not "-" in dir:
continue
size = subprocess.check_output(['du','-b', fullpath]).split()[0]
topic_sizes[groupname] += int(size)
for topic, topic_size in topic_sizes.items():
print topic, topic_size/(size_unit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment