Skip to content

Instantly share code, notes, and snippets.

@UserUnknownFactor
Last active September 7, 2023 06:10
Show Gist options
  • Select an option

  • Save UserUnknownFactor/83d3e086adf40a6bfe410e9856b76ac5 to your computer and use it in GitHub Desktop.

Select an option

Save UserUnknownFactor/83d3e086adf40a6bfe410e9856b76ac5 to your computer and use it in GitHub Desktop.
Repacers for multiple files by regexp
# -*- coding:utf-8 -*-
# Python 3.9+ tool to replace strings using regular expression(s) in many files with specified encoding at once
import argparse, os, sys, traceback, re, csv
from glob import fnmatch
VERSION_STRING = '1.0.3'
# tabs are friggin invisible so using → instead as field separator
# copy&paste it from here to the CSV & external table importer
DELIMITER_CHAR = '→'
csv.register_dialect("replacements", delimiter=DELIMITER_CHAR, quotechar= '\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar='\uFFFF')
ESCAPECHARS_RE = re.compile(r'((?:\\(?:v|t|n|f|r|\"|\'|\\)|\\(?:(?:[0-2][0-9]{1,2}|3[0-6][0-9]|37[0-7]|[0-9]{1,2}))|\\(?:[xuU](?:[0-9a-fA-F]{2,8})))+)')
DEFAULT_PATH = os.path.abspath(os.getcwd())
REPLACERS_FILE = "replace.csv"
CSV_ENCODING = "utf-8-sig"
DEFAULT_MASKS = '*.csv' #'*.csv,*.js'
DEFAULT_FILE_ENCODING = 'utf-8-sig'
#allow escaped characters in a string
def string_unescape(s, encoding="utf-8"):
sarray = ESCAPECHARS_RE.split(s)
for i, si in enumerate(sarray):
if ESCAPECHARS_RE.search(si):
sarray[i] = si.encode('latin1').decode('unicode-escape')
return ''.join(sarray)
def replaceStringInFile(fileName, useCSV, oldString, newString, fileEnc, consoleEnc, ignoreCase=False, backupOld=False, work_dir=DEFAULT_PATH):
if not(os.path.isfile(fileName) and os.access(fileName, os.W_OK)):
if sys.stdout.isatty(): print("File not writeable:" + fileName)
return False
j = 0
text = ''
text_old = ''
#res = open("result.txt", 'a', encoding="utf-8")
with open(fileName, 'r', encoding=fileEnc) as f:
text = f.read()
text_old = text
if useCSV and os.path.isfile(REPLACERS_FILE):
k = 0
with open(REPLACERS_FILE, 'r', newline='', encoding=CSV_ENCODING) as f:
reader = csv.reader(f, 'replacements')
for row in reader:
if not row or row[0] is None or row[1] is None: continue
orig = row[0]
repl = row[1] or ''
k = len(re.compile(re.escape(orig)).findall(text))
text_new = text.replace(orig, repl)
if k:
j += 1
#print(fileName.replace(work_dir, '') + ":", orig, repl, file=res)
text = text_new
if k > 0 and sys.stdout.isatty(): print(" Found %d pattern(s) and %d match(es) in" % (j, k), fileName.replace(work_dir, ''))
elif oldString != '':
flags = re.MULTILINE
if ignoreCase:
flags = flags | re.IGNORECASE
oldStringRe = re.compile(oldString, flags)
newString = string_unescape(newString, consoleEnc)
j = len(oldStringRe.findall(text))
if (j > 0) :
text = oldStringRe.sub(newString, text)
if sys.stdout.isatty(): print(" Found %d match(es) in" % j, fileName.replace(work_dir, ''))
if j > 0:
if backupOld:
with open(fileName + '.backup', 'w', encoding=fileEnc) as f:
f.write(text_old)
with open(fileName, 'w', encoding=fileEnc) as f:
f.write(text)
#res.close()
return (j > 0)
def main():
oldString = ''
newString = ''
parser = argparse.ArgumentParser()
# change defaults manually to fix them in place instead of typing from command line each time
parser.add_argument("-p", help=f"File masks (default: {DEFAULT_MASKS})", default=DEFAULT_MASKS, metavar=('file_masks'))
parser.add_argument("-d", help="Search path", default=DEFAULT_PATH, metavar=('files_start_path'))
parser.add_argument("-e", help=f"Files encoding (default: {DEFAULT_FILE_ENCODING})", default=DEFAULT_FILE_ENCODING, metavar=('files_encoding'))
parser.add_argument("-r", help=(f"Use {REPLACERS_FILE} (Format: text" + DELIMITER_CHAR +"replacer; utf-8)"), action="store_true")
parser.add_argument("-i", help=("Ignore word case"), action="store_true")
parser.add_argument("-b", help=("Backup old files with .backup extension"), action="store_true")
regroup = parser.add_argument_group('patterns')
regroup.add_argument("-o", help="RegExp to replace", metavar=('old_regexp'))
regroup.add_argument("-n", help="Text replacer", metavar=('new_string'))
app_args = parser.parse_args()
if len(sys.argv) < 3 and not app_args.r:
print('Replacer v' + VERSION_STRING)
parser.print_help(sys.stderr)
print('\nEscape special characters & | ( < > ^ with ^ and " with \\ in command line.')
print('Capture group replacers are \\\\1, \\\\2 etc.')
print('Example: '+os.path.basename(__file__)+' -e cp932 -p "*.txt,*.tmp" -d .\outdir -o "te(st)[^^\\w]+\\"\\d{2,}\\"" -n "te\\\\1 "')
sys.exit()
oldString = app_args.o if app_args.o else oldString
newString = app_args.n if app_args.n else newString
encStr = app_args.e
fileExts = app_args.p
if not fileExts or fileExts == '': fileExts = DEFAULT_MASKS
masks = fileExts.split(',')
path = app_args.d
if not path or path == '': path = DEFAULT_PATH
if sys.stdout.isatty():
if not app_args.r:
print("\nRegExp Pattern : «" + re.compile(oldString, re.VERBOSE | re.MULTILINE).pattern + '»')
print("String Replacer : «" + newString+'»')
print("File Masks : " + ', '.join(masks))
print("Encoding : " + encStr)
print("Directory : " + path)
if not os.path.exists(path):
if sys.stdout.isatty(): print("Path don't exist: " + path)
sys.exit(2)
matchingFileList = []
for root, dirs, files in os.walk(path):
for filen in files:
if any(fnmatch.fnmatch(filen, mask) for mask in masks):
matchingFileList.append(os.path.join(root, filen))
if sys.stdout.isatty(): print('Found matching files : ' + str(len(matchingFileList)))
fileCount = 0
filesReplaced = 0
for currentFile in matchingFileList:
if (os.path.basename(__file__) in currentFile) or (REPLACERS_FILE in currentFile): continue
fileCount += 1
fileReplaced = replaceStringInFile(currentFile, app_args.r, oldString, newString, encStr, 'latin1', app_args.i, app_args.b, path)
if fileReplaced: filesReplaced += 1
if sys.stdout.isatty():
print("Files searched : " + str(fileCount))
print("Files replaced : " + str(filesReplaced))
if __name__ == '__main__':
main()
@UserUnknownFactor
Copy link
Copy Markdown
Author

Example replace.csv file:

example_text→example_replacer
example_text1→example_replacer1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment