Skip to content

Instantly share code, notes, and snippets.

@dalanmiller
Created October 18, 2014 07:43
Show Gist options
  • Save dalanmiller/6c277983c162422d7da7 to your computer and use it in GitHub Desktop.
Save dalanmiller/6c277983c162422d7da7 to your computer and use it in GitHub Desktop.
import os
import re
import string
import random
path = "path_to_folder_just_about_set_of_text_files"
word = re.compile("\w*")
for x,y,z in os.walk(path):
for file in z:
joined = os.path.join(x,file)
if os.path.isfile(joined) and ".txt" in file and "innertrans" not in file:
#Block to handle lousy non-unicode text
fp = open(joined,'r')
fpc = fp.read().decode("utf-8-sig",'ignore').encode("utf-8", "ignore")
fp.close()
fp = open(joined, 'w')
fp.write(fpc)
fp.close()
out = ""
with open(joined) as f:
for line in f:
line_split = line.split(" ")
for i, l in enumerate(line_split):
if len(l) > 3 and re.match(word,l) and l[-1] not in string.punctuation:
b = l[0]
e = l[-1]
middle = l[1:-1]
middle = list(middle)
random.shuffle(middle)
middle = ''.join(middle)
line_split[i] = b + middle + e
out += ' '.join(line_split)
print len(out), out[:25]
new_filename = os.path.join(x, file.split(".")[0]+"-innertrans"+".txt")
print new_filename
with open( new_filename, 'w') as f2:
f2.write(out)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment