UserUnknownFactor · September 7, 2023 06:10 · UserUnknownFactor · Sep 9, 2020
diff --git a/replacers.py b/replacers.py
 # -*- coding:utf-8 -*-
 # Python 3.9+ tool to replace strings using regular expression(s) in many files with specified encoding at once
 import argparse, os, sys, traceback, re, csv
 from glob import fnmatch

 VERSION_STRING = '1.0.3'

 # tabs are friggin invisible so using → instead as field separator
 # copy&paste it from here to the CSV & external table importer
 DELIMITER_CHAR = '→'
 csv.register_dialect("replacements", delimiter=DELIMITER_CHAR, quotechar= '\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar='\uFFFF')
 ESCAPECHARS_RE = re.compile(r'((?:\\(?:v|t|n|f|r|\"|\'|\\)|\\(?:(?:[0-2][0-9]{1,2}|3[0-6][0-9]|37[0-7]|[0-9]{1,2}))|\\(?:[xuU](?:[0-9a-fA-F]{2,8})))+)')

 DEFAULT_PATH = os.path.abspath(os.getcwd())
 REPLACERS_FILE = "replace.csv"
 CSV_ENCODING = "utf-8-sig"
 DEFAULT_MASKS = '*.csv' #'*.csv,*.js'
 DEFAULT_FILE_ENCODING = 'utf-8-sig'


 #allow escaped characters in a string
 def string_unescape(s, encoding="utf-8"):
    sarray = ESCAPECHARS_RE.split(s)
    for i, si in enumerate(sarray):
        if ESCAPECHARS_RE.search(si):
            sarray[i] = si.encode('latin1').decode('unicode-escape')
    return ''.join(sarray)

 def replaceStringInFile(fileName, useCSV, oldString, newString, fileEnc, consoleEnc, ignoreCase=False, backupOld=False, work_dir=DEFAULT_PATH):
    if not(os.path.isfile(fileName) and os.access(fileName, os.W_OK)):
        if sys.stdout.isatty(): print("File not writeable:" + fileName)
        return False
    j = 0
    text = ''
    text_old = ''
    #res = open("result.txt", 'a', encoding="utf-8")
    with open(fileName, 'r', encoding=fileEnc) as f:
        text = f.read()
        text_old = text
    if useCSV and os.path.isfile(REPLACERS_FILE):
        k = 0
        with open(REPLACERS_FILE, 'r', newline='', encoding=CSV_ENCODING) as f:
            reader = csv.reader(f, 'replacements')
            for row in reader:
                if not row or row[0] is None or row[1] is None: continue
                orig = row[0]
                repl = row[1] or ''
                k = len(re.compile(re.escape(orig)).findall(text))
                text_new = text.replace(orig, repl)
                if k:
                    j += 1
                    #print(fileName.replace(work_dir, '') + ":", orig, repl, file=res)
                    text = text_new
        if k > 0 and sys.stdout.isatty(): print(" Found %d pattern(s) and %d match(es) in" % (j, k), fileName.replace(work_dir, ''))
    elif oldString != '':
        flags = re.MULTILINE
        if ignoreCase:
            flags = flags | re.IGNORECASE
        oldStringRe = re.compile(oldString, flags)
        newString = string_unescape(newString, consoleEnc)
        j = len(oldStringRe.findall(text))
        if (j > 0) :
            text = oldStringRe.sub(newString, text)
            if sys.stdout.isatty(): print(" Found %d match(es) in" % j, fileName.replace(work_dir, ''))
    if j > 0:
        if backupOld:
            with open(fileName + '.backup', 'w', encoding=fileEnc) as f:
                f.write(text_old)
        with open(fileName, 'w', encoding=fileEnc) as f:
            f.write(text)
    #res.close()
    return (j > 0)

 def main():
    oldString = ''
    newString = ''

    parser = argparse.ArgumentParser()
    # change defaults manually to fix them in place instead of typing from command line each time
    parser.add_argument("-p", help=f"File masks (default: {DEFAULT_MASKS})", default=DEFAULT_MASKS, metavar=('file_masks'))
    parser.add_argument("-d", help="Search path", default=DEFAULT_PATH, metavar=('files_start_path'))
    parser.add_argument("-e", help=f"Files encoding (default: {DEFAULT_FILE_ENCODING})", default=DEFAULT_FILE_ENCODING, metavar=('files_encoding'))
    parser.add_argument("-r", help=(f"Use {REPLACERS_FILE} (Format: text" + DELIMITER_CHAR +"replacer; utf-8)"), action="store_true")
    parser.add_argument("-i", help=("Ignore word case"), action="store_true")
    parser.add_argument("-b", help=("Backup old files with .backup extension"), action="store_true")
    regroup = parser.add_argument_group('patterns')
    regroup.add_argument("-o", help="RegExp to replace", metavar=('old_regexp'))
    regroup.add_argument("-n", help="Text replacer", metavar=('new_string'))
    app_args = parser.parse_args()

    if len(sys.argv) < 3 and not app_args.r:
        print('Replacer v' + VERSION_STRING)
        parser.print_help(sys.stderr)
        print('\nEscape special characters & | ( < > ^ with ^ and " with \\ in command line.')
        print('Capture group replacers are \\\\1, \\\\2 etc.')
        print('Example: '+os.path.basename(__file__)+' -e cp932 -p "*.txt,*.tmp" -d .\outdir -o "te(st)[^^\\w]+\\"\\d{2,}\\"" -n "te\\\\1 "')
        sys.exit()

    oldString = app_args.o if app_args.o else oldString
    newString = app_args.n if app_args.n else newString
    encStr = app_args.e
    fileExts = app_args.p
    if not fileExts or fileExts == '': fileExts = DEFAULT_MASKS
    masks = fileExts.split(',')
    path = app_args.d
    if not path or path == '': path = DEFAULT_PATH

    if sys.stdout.isatty():
        if not app_args.r:
            print("\nRegExp Pattern       : «" + re.compile(oldString, re.VERBOSE | re.MULTILINE).pattern + '»') 
            print("String Replacer      : «" + newString+'»')
        print("File Masks           : " + ', '.join(masks))
        print("Encoding             : " + encStr)
        print("Directory            : " + path)

    if not os.path.exists(path):
        if sys.stdout.isatty(): print("Path don't exist: " + path)
        sys.exit(2)

    matchingFileList = []
    for root, dirs, files in os.walk(path):
        for filen in files:
            if any(fnmatch.fnmatch(filen, mask) for mask in masks):
                matchingFileList.append(os.path.join(root, filen))

    if sys.stdout.isatty(): print('Found matching files : ' + str(len(matchingFileList)))
    fileCount = 0
    filesReplaced = 0
    for currentFile in matchingFileList:
        if (os.path.basename(__file__) in currentFile) or (REPLACERS_FILE in currentFile): continue
        fileCount += 1
        fileReplaced = replaceStringInFile(currentFile, app_args.r, oldString, newString, encStr, 'latin1', app_args.i, app_args.b, path)
        if fileReplaced: filesReplaced += 1

    if sys.stdout.isatty():
        print("Files searched       : " + str(fileCount))
        print("Files replaced       : " + str(filesReplaced))

 if __name__ == '__main__':
     main()
	# -- coding:utf-8 --
	# Python 3.9+ tool to replace strings using regular expression(s) in many files with specified encoding at once
	import argparse, os, sys, traceback, re, csv
	from glob import fnmatch

	VERSION_STRING = '1.0.3'

	# tabs are friggin invisible so using → instead as field separator
	# copy&paste it from here to the CSV & external table importer
	DELIMITER_CHAR = '→'
	csv.register_dialect("replacements", delimiter=DELIMITER_CHAR, quotechar= '\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar='\uFFFF')
	ESCAPECHARS_RE = re.compile(r'((?:\\(?:v\|t\|n\|f\|r\|\"\|\'\|\\)\|\\(?:(?:[0-2][0-9]{1,2}\|3[0-6][0-9]\|37[0-7]\|[0-9]{1,2}))\|\\(?:[xuU](?:[0-9a-fA-F]{2,8})))+)')

	DEFAULT_PATH = os.path.abspath(os.getcwd())
	REPLACERS_FILE = "replace.csv"
	CSV_ENCODING = "utf-8-sig"
	DEFAULT_MASKS = '.csv' #'.csv,*.js'
	DEFAULT_FILE_ENCODING = 'utf-8-sig'


	#allow escaped characters in a string
	def string_unescape(s, encoding="utf-8"):
	sarray = ESCAPECHARS_RE.split(s)
	for i, si in enumerate(sarray):
	if ESCAPECHARS_RE.search(si):
	sarray[i] = si.encode('latin1').decode('unicode-escape')
	return ''.join(sarray)

	def replaceStringInFile(fileName, useCSV, oldString, newString, fileEnc, consoleEnc, ignoreCase=False, backupOld=False, work_dir=DEFAULT_PATH):
	if not(os.path.isfile(fileName) and os.access(fileName, os.W_OK)):
	if sys.stdout.isatty(): print("File not writeable:" + fileName)
	return False
	j = 0
	text = ''
	text_old = ''
	#res = open("result.txt", 'a', encoding="utf-8")
	with open(fileName, 'r', encoding=fileEnc) as f:
	text = f.read()
	text_old = text
	if useCSV and os.path.isfile(REPLACERS_FILE):
	k = 0
	with open(REPLACERS_FILE, 'r', newline='', encoding=CSV_ENCODING) as f:
	reader = csv.reader(f, 'replacements')
	for row in reader:
	if not row or row[0] is None or row[1] is None: continue
	orig = row[0]
	repl = row[1] or ''
	k = len(re.compile(re.escape(orig)).findall(text))
	text_new = text.replace(orig, repl)
	if k:
	j += 1
	#print(fileName.replace(work_dir, '') + ":", orig, repl, file=res)
	text = text_new
	if k > 0 and sys.stdout.isatty(): print(" Found %d pattern(s) and %d match(es) in" % (j, k), fileName.replace(work_dir, ''))
	elif oldString != '':
	flags = re.MULTILINE
	if ignoreCase:
	flags = flags \| re.IGNORECASE
	oldStringRe = re.compile(oldString, flags)
	newString = string_unescape(newString, consoleEnc)
	j = len(oldStringRe.findall(text))
	if (j > 0) :
	text = oldStringRe.sub(newString, text)
	if sys.stdout.isatty(): print(" Found %d match(es) in" % j, fileName.replace(work_dir, ''))
	if j > 0:
	if backupOld:
	with open(fileName + '.backup', 'w', encoding=fileEnc) as f:
	f.write(text_old)
	with open(fileName, 'w', encoding=fileEnc) as f:
	f.write(text)
	#res.close()
	return (j > 0)

	def main():
	oldString = ''
	newString = ''

	parser = argparse.ArgumentParser()
	# change defaults manually to fix them in place instead of typing from command line each time
	parser.add_argument("-p", help=f"File masks (default: {DEFAULT_MASKS})", default=DEFAULT_MASKS, metavar=('file_masks'))
	parser.add_argument("-d", help="Search path", default=DEFAULT_PATH, metavar=('files_start_path'))
	parser.add_argument("-e", help=f"Files encoding (default: {DEFAULT_FILE_ENCODING})", default=DEFAULT_FILE_ENCODING, metavar=('files_encoding'))
	parser.add_argument("-r", help=(f"Use {REPLACERS_FILE} (Format: text" + DELIMITER_CHAR +"replacer; utf-8)"), action="store_true")
	parser.add_argument("-i", help=("Ignore word case"), action="store_true")
	parser.add_argument("-b", help=("Backup old files with .backup extension"), action="store_true")
	regroup = parser.add_argument_group('patterns')
	regroup.add_argument("-o", help="RegExp to replace", metavar=('old_regexp'))
	regroup.add_argument("-n", help="Text replacer", metavar=('new_string'))
	app_args = parser.parse_args()

	if len(sys.argv) < 3 and not app_args.r:
	print('Replacer v' + VERSION_STRING)
	parser.print_help(sys.stderr)
	print('\nEscape special characters & \| ( < > ^ with ^ and " with \\ in command line.')
	print('Capture group replacers are \\\\1, \\\\2 etc.')
	print('Example: '+os.path.basename(__file__)+' -e cp932 -p ".txt,.tmp" -d .\outdir -o "te(st)[^^\\w]+\\"\\d{2,}\\"" -n "te\\\\1 "')
	sys.exit()

	oldString = app_args.o if app_args.o else oldString
	newString = app_args.n if app_args.n else newString
	encStr = app_args.e
	fileExts = app_args.p
	if not fileExts or fileExts == '': fileExts = DEFAULT_MASKS
	masks = fileExts.split(',')
	path = app_args.d
	if not path or path == '': path = DEFAULT_PATH

	if sys.stdout.isatty():
	if not app_args.r:
	print("\nRegExp Pattern : «" + re.compile(oldString, re.VERBOSE \| re.MULTILINE).pattern + '»')
	print("String Replacer : «" + newString+'»')
	print("File Masks : " + ', '.join(masks))
	print("Encoding : " + encStr)
	print("Directory : " + path)

	if not os.path.exists(path):
	if sys.stdout.isatty(): print("Path don't exist: " + path)
	sys.exit(2)

	matchingFileList = []
	for root, dirs, files in os.walk(path):
	for filen in files:
	if any(fnmatch.fnmatch(filen, mask) for mask in masks):
	matchingFileList.append(os.path.join(root, filen))

	if sys.stdout.isatty(): print('Found matching files : ' + str(len(matchingFileList)))
	fileCount = 0
	filesReplaced = 0
	for currentFile in matchingFileList:
	if (os.path.basename(__file__) in currentFile) or (REPLACERS_FILE in currentFile): continue
	fileCount += 1
	fileReplaced = replaceStringInFile(currentFile, app_args.r, oldString, newString, encStr, 'latin1', app_args.i, app_args.b, path)
	if fileReplaced: filesReplaced += 1

	if sys.stdout.isatty():
	print("Files searched : " + str(fileCount))
	print("Files replaced : " + str(filesReplaced))

	if __name__ == '__main__':
	main()
No results found