Skip to content

Instantly share code, notes, and snippets.

@rcackermanCC
Created June 22, 2016 20:13
Show Gist options
  • Save rcackermanCC/e4e3d05a2640087f9893a41fbcf938e2 to your computer and use it in GitHub Desktop.
Save rcackermanCC/e4e3d05a2640087f9893a41fbcf938e2 to your computer and use it in GitHub Desktop.
Clean weekday names
def clean_words(text):
weekdays = {'sunday': {'pattern': r'\b(su.*?(n|y))(\W|\s|\b)',
'replacement': r'sunday\3'},
'monday': {'pattern': r'((mo.*?(n|y)(?:s)*)|m)(\W|\s|\b)',
'replacement': r'monday\4'},
'tuesday': {'pattern': r'\b(t(u|y).*?(e|s|y)(?:s)*|t)(\W|\s|\b)',
'replacement': r'tuesday\4'},
'wednesday': {'pattern': r'\b(w|w.*?d(.*?|s|y|t)(?:s)*)(\W|\s|\b)',
'replacement': r'wednesday\3'},
'thursday': {'pattern': r'\b(th|th[a-z]*?(r|s|y|t)(?:s)*)(\W|\s|\b)',
'replacement': r'thursday\3'},
'friday': {'pattern': r'\b(f|fr[a-z]*?(i|y|t)(?:s)*)(\W|\s|\b)',
'replacement': r'friday\3'},
'saturday': {'pattern': r'\b(sa.*?(y|t))(\W|\s|\b)',
'replacement': r'saturday\3'}
}
for k,v in weekdays.iteritems():
p = re.compile(v['pattern'])
text = p.sub(v['replacement'], text)
return text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment