Skip to content

Instantly share code, notes, and snippets.

@rossgoodwin
Created October 8, 2016 18:00
Show Gist options
  • Select an option

  • Save rossgoodwin/5638dfa0f25575dc4b95b3b4a778ae25 to your computer and use it in GitHub Desktop.

Select an option

Save rossgoodwin/5638dfa0f25575dc4b95b3b4a778ae25 to your computer and use it in GitHub Desktop.
import re
with open('faulkner.txt', 'r') as infile:
complete_lines = infile.read().split('\n')
stripped_lines = map(lambda x: x.strip(), complete_lines)
lines = filter(lambda x: x and not x.isdigit(), stripped_lines)
print len(lines)
def is_proper_name(prior_word, word):
proper_name = False
if word[0].isupper() and word[1:].islower():
proper_name = True
if prior_word[-1] in set(['.', '?', '!']):
proper_name = False
return proper_name
def get_proper_names(text):
tokens = text.split()
output = list()
for i in range(1, len(tokens)):
if is_proper_name(tokens[i-1], tokens[i]):
output.append(tokens[i])
# starts_with_cap = filter(
# lambda w: w[0].isupper() and w[1:].islower(),
# tokens
# )
return output
all_names = list()
for l in lines:
names = get_proper_names(l)
all_names.extend(names)
unique_names = list(set(all_names))
new_text = '\n'.join(lines)
def make_reg_exp(name):
regExp = r'\b' + re.escape(name) + r'\b'
return regExp
for name in unique_names:
new_text = re.sub(make_reg_exp(name), 'John', new_text)
print new_text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment