rossgoodwin · October 8, 2016 18:00
diff --git a/propername.py b/propername.py
 import re

 with open('faulkner.txt', 'r') as infile:
 	complete_lines = infile.read().split('\n')

 stripped_lines = map(lambda x: x.strip(), complete_lines)
 lines = filter(lambda x: x and not x.isdigit(), stripped_lines)

 print len(lines)

 def is_proper_name(prior_word, word):
 	proper_name = False
 	if word[0].isupper() and word[1:].islower():
 		proper_name = True
 	if prior_word[-1] in set(['.', '?', '!']):
 		proper_name = False
 	return proper_name

 def get_proper_names(text):
 	tokens = text.split()
 	output = list()
 	for i in range(1, len(tokens)):
 		if is_proper_name(tokens[i-1], tokens[i]):
 			output.append(tokens[i])
 	# starts_with_cap = filter(
 	# 	lambda w: w[0].isupper() and w[1:].islower(),
 	# 	tokens
 	# )
 	return output

 all_names = list()
 for l in lines:
 	names = get_proper_names(l)
 	all_names.extend(names)

 unique_names = list(set(all_names))

 new_text = '\n'.join(lines)

 def make_reg_exp(name):
 	regExp = r'\b' + re.escape(name) + r'\b'
 	return regExp

 for name in unique_names:
 	new_text = re.sub(make_reg_exp(name), 'John', new_text)

 print new_text
	import re

	with open('faulkner.txt', 'r') as infile:
	complete_lines = infile.read().split('\n')

	stripped_lines = map(lambda x: x.strip(), complete_lines)
	lines = filter(lambda x: x and not x.isdigit(), stripped_lines)

	print len(lines)

	def is_proper_name(prior_word, word):
	proper_name = False
	if word[0].isupper() and word[1:].islower():
	proper_name = True
	if prior_word[-1] in set(['.', '?', '!']):
	proper_name = False
	return proper_name

	def get_proper_names(text):
	tokens = text.split()
	output = list()
	for i in range(1, len(tokens)):
	if is_proper_name(tokens[i-1], tokens[i]):
	output.append(tokens[i])
	# starts_with_cap = filter(
	# lambda w: w[0].isupper() and w[1:].islower(),
	# tokens
	# )
	return output

	all_names = list()
	for l in lines:
	names = get_proper_names(l)
	all_names.extend(names)

	unique_names = list(set(all_names))

	new_text = '\n'.join(lines)

	def make_reg_exp(name):
	regExp = r'\b' + re.escape(name) + r'\b'
	return regExp

	for name in unique_names:
	new_text = re.sub(make_reg_exp(name), 'John', new_text)

	print new_text
No results found