Last active
August 29, 2015 14:19
-
-
Save polymorphm/b364b8a0d1d27b84d754 to your computer and use it in GitHub Desktop.
find some text words in files and replace it to other words
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- mode: python; coding: utf-8 -*- | |
assert str is not bytes | |
import sys | |
import os, os.path | |
import csv | |
import itertools | |
import random | |
def replace_middle_iter_create(): | |
for i in itertools.count(): | |
rnd_str = '*'.join(str(random.randrange(10)) for r_i in range(10)) | |
replace_middle = '**{}*{}**'.format(rnd_str, i) | |
yield replace_middle | |
def out_name_iter_create(): | |
for i in itertools.count(): | |
out_name = 'out-{}.txt'.format(i) | |
yield out_name | |
def main(): | |
if len(sys.argv) != 4: | |
exit(code=2) | |
source_dir = sys.argv[1] | |
replace_csv_path = sys.argv[2] | |
out_dir = sys.argv[3] | |
source_path_list = [] | |
replace_list = [] | |
replace_middle_iter = replace_middle_iter_create() | |
out_name_iter = out_name_iter_create() | |
for r, d, f in os.walk(source_dir): | |
for source_name in f: | |
if not source_name.endswith('.txt'): | |
continue | |
source_path = os.path.join(r, source_name) | |
source_path_list.append(source_path) | |
with open(replace_csv_path, encoding='utf-8', errors='replace') as replace_csv_fd: | |
replace_csv = csv.reader(replace_csv_fd) | |
for replace_row in replace_csv: | |
if len(replace_row) != 2: | |
continue | |
replace_source = replace_row[0] | |
replace_middle = next(replace_middle_iter) | |
replace_target = replace_row[1] | |
replace_list.append(( | |
replace_source, | |
replace_middle, | |
replace_target, | |
)) | |
os.mkdir(out_dir) | |
for source_path in source_path_list: | |
with open(source_path, encoding='utf-8', errors='replace') as source_fd: | |
orig_data = source_fd.read() | |
data = orig_data | |
for replace_source, replace_middle, replace_target in replace_list: | |
if replace_source not in data: | |
continue | |
data = data.replace(replace_source, replace_middle, 1) | |
for replace_source, replace_middle, replace_target in replace_list: | |
if replace_middle not in data: | |
continue | |
data = data.replace(replace_middle, replace_target) | |
if orig_data != data: | |
out_path = os.path.join(out_dir, next(out_name_iter)) | |
with open(out_path, mode='w', encoding='utf-8', newline='\n') as out_fd: | |
out_fd.write('{}\n'.format(data)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment