Last active
September 7, 2023 06:10
-
-
Save UserUnknownFactor/83d3e086adf40a6bfe410e9856b76ac5 to your computer and use it in GitHub Desktop.
Repacers for multiple files by regexp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding:utf-8 -*- | |
| # Python 3.9+ tool to replace strings using regular expression(s) in many files with specified encoding at once | |
| import argparse, os, sys, traceback, re, csv | |
| from glob import fnmatch | |
| VERSION_STRING = '1.0.3' | |
| # tabs are friggin invisible so using → instead as field separator | |
| # copy&paste it from here to the CSV & external table importer | |
| DELIMITER_CHAR = '→' | |
| csv.register_dialect("replacements", delimiter=DELIMITER_CHAR, quotechar= '\uFFFF', doublequote=False, quoting=csv.QUOTE_NONE, escapechar='\uFFFF') | |
| ESCAPECHARS_RE = re.compile(r'((?:\\(?:v|t|n|f|r|\"|\'|\\)|\\(?:(?:[0-2][0-9]{1,2}|3[0-6][0-9]|37[0-7]|[0-9]{1,2}))|\\(?:[xuU](?:[0-9a-fA-F]{2,8})))+)') | |
| DEFAULT_PATH = os.path.abspath(os.getcwd()) | |
| REPLACERS_FILE = "replace.csv" | |
| CSV_ENCODING = "utf-8-sig" | |
| DEFAULT_MASKS = '*.csv' #'*.csv,*.js' | |
| DEFAULT_FILE_ENCODING = 'utf-8-sig' | |
| #allow escaped characters in a string | |
| def string_unescape(s, encoding="utf-8"): | |
| sarray = ESCAPECHARS_RE.split(s) | |
| for i, si in enumerate(sarray): | |
| if ESCAPECHARS_RE.search(si): | |
| sarray[i] = si.encode('latin1').decode('unicode-escape') | |
| return ''.join(sarray) | |
| def replaceStringInFile(fileName, useCSV, oldString, newString, fileEnc, consoleEnc, ignoreCase=False, backupOld=False, work_dir=DEFAULT_PATH): | |
| if not(os.path.isfile(fileName) and os.access(fileName, os.W_OK)): | |
| if sys.stdout.isatty(): print("File not writeable:" + fileName) | |
| return False | |
| j = 0 | |
| text = '' | |
| text_old = '' | |
| #res = open("result.txt", 'a', encoding="utf-8") | |
| with open(fileName, 'r', encoding=fileEnc) as f: | |
| text = f.read() | |
| text_old = text | |
| if useCSV and os.path.isfile(REPLACERS_FILE): | |
| k = 0 | |
| with open(REPLACERS_FILE, 'r', newline='', encoding=CSV_ENCODING) as f: | |
| reader = csv.reader(f, 'replacements') | |
| for row in reader: | |
| if not row or row[0] is None or row[1] is None: continue | |
| orig = row[0] | |
| repl = row[1] or '' | |
| k = len(re.compile(re.escape(orig)).findall(text)) | |
| text_new = text.replace(orig, repl) | |
| if k: | |
| j += 1 | |
| #print(fileName.replace(work_dir, '') + ":", orig, repl, file=res) | |
| text = text_new | |
| if k > 0 and sys.stdout.isatty(): print(" Found %d pattern(s) and %d match(es) in" % (j, k), fileName.replace(work_dir, '')) | |
| elif oldString != '': | |
| flags = re.MULTILINE | |
| if ignoreCase: | |
| flags = flags | re.IGNORECASE | |
| oldStringRe = re.compile(oldString, flags) | |
| newString = string_unescape(newString, consoleEnc) | |
| j = len(oldStringRe.findall(text)) | |
| if (j > 0) : | |
| text = oldStringRe.sub(newString, text) | |
| if sys.stdout.isatty(): print(" Found %d match(es) in" % j, fileName.replace(work_dir, '')) | |
| if j > 0: | |
| if backupOld: | |
| with open(fileName + '.backup', 'w', encoding=fileEnc) as f: | |
| f.write(text_old) | |
| with open(fileName, 'w', encoding=fileEnc) as f: | |
| f.write(text) | |
| #res.close() | |
| return (j > 0) | |
| def main(): | |
| oldString = '' | |
| newString = '' | |
| parser = argparse.ArgumentParser() | |
| # change defaults manually to fix them in place instead of typing from command line each time | |
| parser.add_argument("-p", help=f"File masks (default: {DEFAULT_MASKS})", default=DEFAULT_MASKS, metavar=('file_masks')) | |
| parser.add_argument("-d", help="Search path", default=DEFAULT_PATH, metavar=('files_start_path')) | |
| parser.add_argument("-e", help=f"Files encoding (default: {DEFAULT_FILE_ENCODING})", default=DEFAULT_FILE_ENCODING, metavar=('files_encoding')) | |
| parser.add_argument("-r", help=(f"Use {REPLACERS_FILE} (Format: text" + DELIMITER_CHAR +"replacer; utf-8)"), action="store_true") | |
| parser.add_argument("-i", help=("Ignore word case"), action="store_true") | |
| parser.add_argument("-b", help=("Backup old files with .backup extension"), action="store_true") | |
| regroup = parser.add_argument_group('patterns') | |
| regroup.add_argument("-o", help="RegExp to replace", metavar=('old_regexp')) | |
| regroup.add_argument("-n", help="Text replacer", metavar=('new_string')) | |
| app_args = parser.parse_args() | |
| if len(sys.argv) < 3 and not app_args.r: | |
| print('Replacer v' + VERSION_STRING) | |
| parser.print_help(sys.stderr) | |
| print('\nEscape special characters & | ( < > ^ with ^ and " with \\ in command line.') | |
| print('Capture group replacers are \\\\1, \\\\2 etc.') | |
| print('Example: '+os.path.basename(__file__)+' -e cp932 -p "*.txt,*.tmp" -d .\outdir -o "te(st)[^^\\w]+\\"\\d{2,}\\"" -n "te\\\\1 "') | |
| sys.exit() | |
| oldString = app_args.o if app_args.o else oldString | |
| newString = app_args.n if app_args.n else newString | |
| encStr = app_args.e | |
| fileExts = app_args.p | |
| if not fileExts or fileExts == '': fileExts = DEFAULT_MASKS | |
| masks = fileExts.split(',') | |
| path = app_args.d | |
| if not path or path == '': path = DEFAULT_PATH | |
| if sys.stdout.isatty(): | |
| if not app_args.r: | |
| print("\nRegExp Pattern : «" + re.compile(oldString, re.VERBOSE | re.MULTILINE).pattern + '»') | |
| print("String Replacer : «" + newString+'»') | |
| print("File Masks : " + ', '.join(masks)) | |
| print("Encoding : " + encStr) | |
| print("Directory : " + path) | |
| if not os.path.exists(path): | |
| if sys.stdout.isatty(): print("Path don't exist: " + path) | |
| sys.exit(2) | |
| matchingFileList = [] | |
| for root, dirs, files in os.walk(path): | |
| for filen in files: | |
| if any(fnmatch.fnmatch(filen, mask) for mask in masks): | |
| matchingFileList.append(os.path.join(root, filen)) | |
| if sys.stdout.isatty(): print('Found matching files : ' + str(len(matchingFileList))) | |
| fileCount = 0 | |
| filesReplaced = 0 | |
| for currentFile in matchingFileList: | |
| if (os.path.basename(__file__) in currentFile) or (REPLACERS_FILE in currentFile): continue | |
| fileCount += 1 | |
| fileReplaced = replaceStringInFile(currentFile, app_args.r, oldString, newString, encStr, 'latin1', app_args.i, app_args.b, path) | |
| if fileReplaced: filesReplaced += 1 | |
| if sys.stdout.isatty(): | |
| print("Files searched : " + str(fileCount)) | |
| print("Files replaced : " + str(filesReplaced)) | |
| if __name__ == '__main__': | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example
replace.csvfile: