Created
December 15, 2013 23:46
-
-
Save jgosmann/7979969 to your computer and use it in GitHub Desktop.
Generate a regular expression matching all numbers of the following form: - Any number of digits as written out English words (case insensitive) followed by at least one whitespace. Any number witch typos (e.g. eihgt instead of eight) are allowed, but no character must move more than one position.
- followed by any number of digits.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/nev python | |
| def gen_switch_typo_variants(string): | |
| if len(string) < 2: | |
| return [string] | |
| var1 = [string[0] + v for v in gen_switch_typo_variants(string[1:])] | |
| var2 = [string[1::-1] + v for v in gen_switch_typo_variants(string[2:])] | |
| return var1 + var2 | |
| def make_case_insensitive(string): | |
| return reduce( | |
| lambda a, b: a + b, ('[{}{}]'.format(c, c.upper()) for c in string)) | |
| number_words = ['one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', | |
| 'nine', 'zero'] | |
| if __name__ == '__main__': | |
| all_lower_number_writings = reduce( | |
| lambda a, b: a + b, | |
| (gen_switch_typo_variants(word) for word in number_words)) | |
| all_lower_number_writings_case_insensitive = [ | |
| make_case_insensitive(w) for w in all_lower_number_writings] | |
| regex = r'^(({writings})\s+)*\d*$'.format( | |
| writings='|'.join(all_lower_number_writings_case_insensitive)) | |
| print regex |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment