Skip to content

Instantly share code, notes, and snippets.

@dsoprea
Created May 22, 2018 01:08
Show Gist options
  • Save dsoprea/697f72ffba7d7b3e13e6b7c31ec3788e to your computer and use it in GitHub Desktop.
Save dsoprea/697f72ffba7d7b3e13e6b7c31ec3788e to your computer and use it in GitHub Desktop.
Tool to recursively strip all extended characters from all non-hidden files
#!/usr/bin/env python3
import sys
import os
import argparse
import shutil
_DESCRIPTION = "Strip extended characters from all non-hidden files in a path."
def _get_args():
parser = \
argparse.ArgumentParser(
description=_DESCRIPTION)
parser.add_argument(
'root_path',
help="Root path")
parser.add_argument(
'-bp', '--backup-path',
help="Path to copy originals to before modifying")
args = parser.parse_args()
return args
def _main():
args = _get_args()
ignores = []
len_ = len(args.root_path)
for path, folders, files in os.walk(args.root_path):
rel_path = path[len_ + 1:]
for folder in folders:
if folder[0] == '.':
ignores.append(os.path.join(rel_path, folder))
if rel_path in ignores:
continue
for filename in files:
filepath = os.path.join(path, filename)
with open(filepath, 'rb') as f:
data = f.read()
parts = []
last_change = None
fixes = 0
for i, c in enumerate(data):
try:
o = ord(c)
except TypeError:
o = c
if o <= 0x7f:
continue
if last_change is None:
parts.append(data[:i])
else:
parts.append(data[last_change + 1:i])
parts.append('.')
last_change = i
if parts:
print("{} characters stripped. Updating [{}].".format(
(len(parts) - 1) / 2, filepath[len(args.root_path) + 1:]))
if args.backup_path is not None:
backup_path = os.path.join(args.backup_path, rel_path)
backup_filepath = os.path.join(backup_path, filename)
if os.path.exists(backup_path) is False:
os.makedirs(backup_path)
with open(filepath, 'rb') as f:
with open(backup_filepath, 'wb') as g:
shutil.copyfileobj(f, g)
with open(filepath, 'wb') as f:
f.write(''.join(parts))
_main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment