Created
March 27, 2023 10:22
-
-
Save pbock/8571f417f7fd6f15fc89066d8eaa0026 to your computer and use it in GitHub Desktop.
Find all files ending in lots of null bytes
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This script recurses through the current user's home directory and prints the | |
paths to all files that end in 2 kB or more of null bytes. | |
The motivation for this is that Time Machine seems to have corrupted my music | |
files. Several kilobytes' worth of data have gone missing from the middle of | |
some files and replaced with an equivalent number of null bytes at the end. | |
""" | |
import os | |
skipdirs = ['node_modules', '.virtualenv', '__pycache__', '.git', 'Library'] | |
skipfiles = ['.sqlite', '.sock', '.db', '.DS_store'] | |
# How many bytes to check at the end of the file | |
byte_count = 2000 | |
home_dir = os.path.expanduser('~') | |
for dirpath, dirs, files in os.walk(home_dir): | |
if any([d in dirpath for d in skipdirs]): | |
continue | |
for file in files: | |
null_byte_count = 0 | |
if any([f in file for f in skipfiles]): | |
continue | |
filepath = dirpath + '/' + file | |
try: | |
with open(filepath, 'rb') as f: | |
try: | |
f.seek(-byte_count, 2) | |
last_bytes = f.read(byte_count) | |
if all(byte == 0 for byte in last_bytes): | |
print(filepath) | |
except OSError as err: | |
continue | |
except FileNotFoundError as err: | |
pass | |
except OSError as err: | |
pass | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment