Jacob Chapman chapmanjacobd

How I freed up one TiB of space

I didn't realize just how much space those 24 Hour YouTube tracks take up. After hearing the same song on repeat I decided to check:

$ pip install xklb
$ lb fsadd --audio ~/lb/audio.db /mnt/d/81_New_Music/ /mnt/d/82_Audiobooks/
$ lb listen ~/lb/audio.db /81_New_Music/ --duration +3hr -p a
╒═══════════╤═════════╤═══════════════════════════════════════════╤═══════════════════════════╤══════════════════════════════════════════╤════════╤════════════╕
│ path      │   count │ duration                                  │ avg_duration              │ cadence_adj_duration                     │ size   │ avg_size   │

╞═══════════╪═════════╪═══════════════════════════════════════════╪═══════════════════════════╪══════════════════════════════════════════╪════════╪════════════╡


	def decode_full_scan(path):
	ffprobe_cmd = [
	'ffprobe', '-show_entries', 'stream=r_frame_rate,nb_read_frames,duration',
	'-select_streams', 'v', '-count_frames',
	'-of', 'json',
	'-threads', '20',
	'-v', '0',
	path,
	]

	javascript:(function(){

	var previousScroll;
	var scrollInterval;
	var nextPageTimeout;

	function scrollAndCheck() {
	previousScroll = window.scrollY;
	window.scrollBy(0, 3000);

	def rank_dataframe(df, ascending_cols=None):
	if ascending_cols is None:
	ascending_cols = []

	numeric_cols = df.select_dtypes(include=["number"]).columns
	ranks = df[numeric_cols].apply(
	lambda x: x.rank(
	method="min", na_option="bottom", ascending=x.name in ascending_cols
	)
	)

	def rebin_folders(paths, max_files_per_folder=16000):
	parent_paths = Counter(Path(p).parent for p in paths)
	rebin_parents = set(k for k,v in parent_paths.items() if v > max_files_per_folder)

	rebinned = set(p for p in paths if Path(p).parent in rebin_parents)
	untouched = set(paths) - rebinned

	rebinned_tuples = []
	chunked = list(chunks(list(rebinned), max_files_per_folder))
	min_len = len(str(len(chunked) + 1)) # start at 1


	def mpv_enrich(args, media) -> List[Dict]:
	for m in media:
	md5 = path_to_mpv_watchlater_md5(m["path"])
	metadata_path = Path(args.watch_later_directory, md5)
	if metadata_path.exists():
	m["time_partial_first"] = int(metadata_path.stat().st_ctime)
	m["time_partial_last"] = int(metadata_path.stat().st_mtime)
	else:
	m["time_partial_first"] = 0

	timeout -s HUP 3s parallel --timeout 10 -j2 sleep {} ::: 10 12 15 10 8 15

	string split ' ' 10 12 15 10 8 15 \| timeout -s HUP 3s parallel --timeout 10 -j2 sleep {}

	def calculate_sparseness(stat) -> int:
	if stat.st_size == 0:
	sparseness = 0
	else:
	blocks_allocated = stat.st_blocks * 512
	sparseness = blocks_allocated / stat.st_size
	return sparseness

	class DictHandler(logging.Handler):
	def __init__(self, log_dict):
	super().__init__()
	self.log_dict = log_dict

	def emit(self, record):
	log_entry = self.format(record)
	self.log_dict.setdefault(record.levelname, []).append(log_entry)

	def load_spacy_model(model=None):
	try:
	import spacy
	except ModuleNotFoundError:
	log.error("Install spaCy and sklearn to use:")
	log.error("pip install spacy sklearn")
	log.error("python -m spacy download en_core_web_sm")
	sys.exit(1)

	if model: