Skip to content

Instantly share code, notes, and snippets.

View chapmanjacobd's full-sized avatar
πŸ₯…
goal_net

Jacob Chapman chapmanjacobd

πŸ₯…
goal_net
View GitHub Profile
@chapmanjacobd
chapmanjacobd / gnu_parallel_global_timeout.fish
Created October 18, 2023 16:23
GNU Parallel global timeout
timeout -s HUP 3s parallel --timeout 10 -j2 sleep {} ::: 10 12 15 10 8 15
string split ' ' 10 12 15 10 8 15 | timeout -s HUP 3s parallel --timeout 10 -j2 sleep {}
@chapmanjacobd
chapmanjacobd / ffmpeg_check_file.py
Created September 28, 2023 01:20
how to correctly scan a video file to check for corruption
def decode_full_scan(path):
ffprobe_cmd = [
'ffprobe', '-show_entries', 'stream=r_frame_rate,nb_read_frames,duration',
'-select_streams', 'v', '-count_frames',
'-of', 'json',
'-threads', '20',
'-v', '0',
path,
]
javascript:(function(){
var previousScroll;
var scrollInterval;
var nextPageTimeout;
function scrollAndCheck() {
previousScroll = window.scrollY;
window.scrollBy(0, 3000);
def rank_dataframe(df, ascending_cols=None):
if ascending_cols is None:
ascending_cols = []
numeric_cols = df.select_dtypes(include=["number"]).columns
ranks = df[numeric_cols].apply(
lambda x: x.rank(
method="min", na_option="bottom", ascending=x.name in ascending_cols
)
)
def rebin_folders(paths, max_files_per_folder=16000):
parent_paths = Counter(Path(p).parent for p in paths)
rebin_parents = set(k for k,v in parent_paths.items() if v > max_files_per_folder)
rebinned = set(p for p in paths if Path(p).parent in rebin_parents)
untouched = set(paths) - rebinned
rebinned_tuples = []
chunked = list(chunks(list(rebinned), max_files_per_folder))
min_len = len(str(len(chunked) + 1)) # start at 1
def mpv_enrich(args, media) -> List[Dict]:
for m in media:
md5 = path_to_mpv_watchlater_md5(m["path"])
metadata_path = Path(args.watch_later_directory, md5)
if metadata_path.exists():
m["time_partial_first"] = int(metadata_path.stat().st_ctime)
m["time_partial_last"] = int(metadata_path.stat().st_mtime)
else:
m["time_partial_first"] = 0
def calculate_sparseness(stat) -> int:
if stat.st_size == 0:
sparseness = 0
else:
blocks_allocated = stat.st_blocks * 512
sparseness = blocks_allocated / stat.st_size
return sparseness
class DictHandler(logging.Handler):
def __init__(self, log_dict):
super().__init__()
self.log_dict = log_dict
def emit(self, record):
log_entry = self.format(record)
self.log_dict.setdefault(record.levelname, []).append(log_entry)
def load_spacy_model(model=None):
try:
import spacy
except ModuleNotFoundError:
log.error("Install spaCy and sklearn to use:")
log.error("pip install spacy sklearn")
log.error("python -m spacy download en_core_web_sm")
sys.exit(1)
if model:
@chapmanjacobd
chapmanjacobd / freed_up_one_terabyte.md
Last active December 27, 2025 05:32
How I freed up one TiB of space

How I freed up one TiB of space

I didn't realize just how much space those 24 Hour YouTube tracks take up. After hearing the same song on repeat I decided to check:

$ pip install xklb
$ lb fsadd --audio ~/lb/audio.db /mnt/d/81_New_Music/ /mnt/d/82_Audiobooks/
$ lb listen ~/lb/audio.db /81_New_Music/ --duration +3hr -p a
╒═══════════╀═════════╀═══════════════════════════════════════════╀═══════════════════════════╀══════════════════════════════════════════╀════════╀════════════╕
β”‚ path      β”‚   count β”‚ duration                                  β”‚ avg_duration              β”‚ cadence_adj_duration                     β”‚ size   β”‚ avg_size   β”‚

β•žβ•β•β•β•β•β•β•β•β•β•β•β•ͺ═════════β•ͺ═══════════════════════════════════════════β•ͺ═══════════════════════════β•ͺ══════════════════════════════════════════β•ͺ════════β•ͺ════════════║