Created
March 5, 2019 01:52
-
-
Save delphym/b0cb72d148bd6fb40f6c2f57632b164f to your computer and use it in GitHub Desktop.
Recursively find and replace text in files under a specific folder with preview of changed data in dry-run mode, and optional backup.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Recursively find and replace text in files under a specific folder with preview of changed data in dry-run mode. | |
The preview is automatically shown in `--dry-run` mode. | |
To also show the preview for write mode use the verbose flag, i.e. `--verbose` or `-v`. | |
Parameters should preferrably use single quotes, especially for the glob switch, i.e. `--glob='*.html'`. | |
If you need to search/replace special characters such as literal '*', '?' etc then make sure you escape | |
them with slash, i.e. '\*stardust\?\*\*\*'. | |
============ | |
Example Usage | |
--------------- | |
**See what is going to change (dry run):** | |
> flip all dates from 2017-12-31 to 31-12-2017 | |
find_replace.py --verbose --dry-run --glob "*.py" --search-regex "\b(\d{4})-(\d{2})-(\d{2})\b" --replace-regex "\3-\2-\1" | |
version with short options: | |
find-replace.py -v -dr -s -g "*.py" "\b(\d{4})-(\d{2})-(\d{2})\b" -r "\3-\2-\1" | |
**Do actual replacement:** | |
find_replace.py --dir project/myfolder --search-regex "\b(\d{4})-(\d{2})-(\d{2})\b" --replace-regex "\3-\2-\1" | |
**Do actual replacement and create backup files:** | |
find_replace.py --dir project/myfolder --search-regex "\b(\d{4})-(\d{2})-(\d{2})\b" --replace-regex "\3-\2-\1" --create-backup | |
Output of `find_replace.py -h`: | |
DESCRIPTION: | |
Find and replace recursively from the given folder using regular expressions | |
optional arguments: | |
-h, --help show this help message and exit | |
--dir DIR, -d DIR folder to search in; by default current folder | |
--search-regex SEARCH_REGEX, -s SEARCH_REGEX | |
search regex | |
--replace-regex REPLACE_REGEX, -r REPLACE_REGEX | |
replacement regex | |
--glob GLOB, -g GLOB glob pattern, i.e. *.html | |
--dry-run, -dr don't replace anything just show what is going to be | |
done | |
--create-backup, -b Create backup files | |
--verbose, -v Show files which don't match the search regex | |
--print-parent-folder, -p | |
Show the parent info for debug | |
--list-non-matching, -n | |
Supress colors | |
USAGE: | |
find-replace-in-files-regex.py -d [my_folder] -s <search_regex> -r <replace_regex> -g [glob_pattern] | |
""" | |
from __future__ import print_function | |
import os | |
import fnmatch | |
import sys | |
import shutil | |
import re | |
import argparse | |
class Colors: | |
Default = "\033[39m" | |
Black = "\033[30m" | |
Red = "\033[31m" | |
Green = "\033[32m" | |
Yellow = "\033[33m" | |
Blue = "\033[34m" | |
Magenta = "\033[35m" | |
Cyan = "\033[36m" | |
LightGray = "\033[37m" | |
DarkGray = "\033[90m" | |
LightRed = "\033[91m" | |
LightGreen = "\033[92m" | |
LightYellow = "\033[93m" | |
LightBlue = "\033[94m" | |
LightMagenta = "\033[95m" | |
LightCyan = "\033[96m" | |
White = "\033[97m" | |
NoColor = "\033[0m" | |
def find_replace(cfg): | |
search_pattern = re.compile(cfg.search_regex) | |
if cfg.dry_run: | |
print('THIS IS A DRY RUN -- NO FILES WILL BE CHANGED!') | |
for path, dirs, files in os.walk(os.path.abspath(cfg.dir)): | |
for filename in fnmatch.filter(files, cfg.glob): | |
try: | |
if cfg.print_parent_folder: | |
pardir = os.path.normpath(os.path.join(path, '..')) | |
pardir = os.path.split(pardir)[-1] | |
print('[%s]' % pardir) | |
full_path = os.path.join(path, filename) | |
# backup original file | |
if cfg.create_backup: | |
backup_path = full_path + '.bak' | |
while os.path.exists(backup_path): | |
backup_path += '.bak' | |
print('DBG: creating backup', backup_path) | |
shutil.copyfile(full_path, backup_path) | |
if not os.path.isfile(full_path): | |
print("{}Path {} is not a regular file! Skipping{}".format(Colors.Red, full_path, Colors.NoColor)) | |
continue | |
with open(full_path) as f: | |
old_text = f.read() | |
all_matches = search_pattern.findall(old_text) | |
if all_matches: | |
print('{}Found {} match(es) in file {}{}'.format(Colors.LightMagenta, len(all_matches), full_path, Colors.NoColor)) | |
new_text = search_pattern.sub(cfg.replace_regex, old_text) | |
if not cfg.dry_run: | |
with open(full_path, "w") as f: | |
print('DBG: replacing in file', full_path) | |
f.write(new_text) | |
if cfg.verbose or cfg.dry_run: | |
colorized_old = search_pattern.sub(Colors.LightBlue + r"\g<0>" + Colors.NoColor, old_text) | |
colorized_old = '\n'.join(['\t' + line.strip() for line in colorized_old.split('\n') if Colors.LightBlue in line]) | |
colorized = search_pattern.sub(Colors.Green + cfg.replace_regex + Colors.NoColor, old_text) | |
colorized = '\n'.join(['\t' + line.strip() for line in colorized.split('\n') if Colors.Green in line]) | |
print("{}BEFORE:{}\n{}".format(Colors.White, Colors.NoColor, colorized_old)) | |
print("{}AFTER :{}\n{}".format(Colors.Yellow, Colors.NoColor, colorized)) | |
elif cfg.list_non_matching: | |
print('File {} does not contain search regex "{}"'.format(filename, cfg.search_regex)) | |
except Exception as e: | |
print("{}ERROR processing file '{}'{}".format(Colors.Red, filename, Colors.NoColor)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='''DESCRIPTION: | |
Find and replace recursively from the given folder using regular expressions''', | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog='''USAGE: | |
{0} -d [my_folder] -s <search_regex> -r <replace_regex> -g [glob_pattern] | |
'''.format(os.path.basename(sys.argv[0]))) | |
parser.add_argument('--dir', '-d', | |
help='folder to search in; by default current folder', | |
default='.') | |
parser.add_argument('--search-regex', '-s', | |
help='search regex', | |
required=True) | |
parser.add_argument('--replace-regex', '-r', | |
help='replacement regex', | |
required=True) | |
parser.add_argument('--glob', '-g', | |
help='glob pattern, i.e. *.html', | |
default="*.*") | |
parser.add_argument('--dry-run', '-dr', | |
action='store_true', | |
help="don't replace anything just show what is going to be done", | |
default=False) | |
parser.add_argument('--create-backup', '-b', | |
action='store_true', | |
help='Create backup files', | |
default=False) | |
parser.add_argument('--verbose', '-v', | |
action='store_true', | |
help="Show files which don't match the search regex", | |
default=False) | |
parser.add_argument('--print-parent-folder', '-p', | |
action='store_true', | |
help="Show the parent info for debug", | |
default=False) | |
parser.add_argument('--list-non-matching', '-n', | |
action='store_true', | |
help="List files that don't match the search regex", | |
default=False) | |
config = parser.parse_args(sys.argv[1:]) | |
find_replace(config) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment