Skip to content

Instantly share code, notes, and snippets.

@ptrourke
Created November 6, 2015 18:01
Show Gist options
  • Save ptrourke/764588ef4bbd06a7641c to your computer and use it in GitHub Desktop.
Save ptrourke/764588ef4bbd06a7641c to your computer and use it in GitHub Desktop.
Simple script for running tests of regular expression patterns against a list of input strings.
__author__ = 'ptrourke'
import re
'''
A simple script for running tests of regular expressions (regexes) against a list of input strings.
To use, change the values of test_strings, test_patterns, and group_indices .
'''
# The strings to be used as test vectors to test the regexes
test_strings = [
'march_of_the_penguins',
'abcdefg_hijklmnop_QRSTUVW_xyz_ABC',
"this-shouldn't=match-at-all"
]
# The regex patterns to test against the test vectors
test_patterns = [
'^(.*)_(.*)_.*$',
'^(.*?)_(.*)_.*$',
'^(.*)_(.*?)_.*$',
'^(.*?)_(.*?)_.*$',
'^([^_]*)_([^_]*)_.*$',
]
# The match groups to display (use `[0]` to show the full string).
group_indices = [1,2]
def test_regex(regex, item, group_indices):
m = re.match(regex, item)
result_list = []
for index in group_indices:
result_list.append(m.group(index))
return str(result_list)
def test_regexes(test_strings, test_patterns, group_indices):
test_string_count = len(test_strings)
pattern_count = len(test_patterns)
group_count = len(group_indices)
print 'Testing %d test strings against %d regexes\n' % (test_string_count, pattern_count )
pattern_length = len(max(test_patterns, key=len)) + 4
for item in test_strings:
print "Test string: %s\n" % item
item_length = group_count * (len(item) + 5)
group_listing_string = ', '.join(['Group %d' % group_id for group_id in group_indices])
print '%s %s %s' % ('# '.rjust(3), 'Regex '.ljust(pattern_length), ('Matches: %s' % group_listing_string ).ljust(item_length))
for index, test_pattern in list(enumerate(test_patterns)):
try:
regex_test_result = test_regex(test_pattern, item, group_indices)
except AttributeError, e:
regex_test_result = ''
print '%s %s: %s' % (('%d.' % (index + 1)).rjust(3), ('\"%s\"' % test_pattern).ljust(pattern_length), regex_test_result.ljust(item_length))
print " \n"
test_regexes(test_strings, test_patterns, group_indices)
@ptrourke
Copy link
Author

ptrourke commented Nov 6, 2015

Example output:

Testing 3 test strings against 5 regexes

Test string: march_of_the_penguins

 #  Regex                    Matches: Group 1, Group 2                           
 1. "^(.*)_(.*)_.*$"        : ['march_of', 'the']                                 
 2. "^(.*?)_(.*)_.*$"       : ['march', 'of_the']                                 
 3. "^(.*)_(.*?)_.*$"       : ['march_of', 'the']                                 
 4. "^(.*?)_(.*?)_.*$"      : ['march', 'of']                                     
 5. "^([^_]*)_([^_]*)_.*$"  : ['march', 'of']                                     


Test string: abcdefg_hijklmnop_QRSTUVW_xyz_ABC

 #  Regex                    Matches: Group 1, Group 2                                                   
 1. "^(.*)_(.*)_.*$"        : ['abcdefg_hijklmnop_QRSTUVW', 'xyz']                                        
 2. "^(.*?)_(.*)_.*$"       : ['abcdefg', 'hijklmnop_QRSTUVW_xyz']                                        
 3. "^(.*)_(.*?)_.*$"       : ['abcdefg_hijklmnop_QRSTUVW', 'xyz']                                        
 4. "^(.*?)_(.*?)_.*$"      : ['abcdefg', 'hijklmnop']                                                    
 5. "^([^_]*)_([^_]*)_.*$"  : ['abcdefg', 'hijklmnop']                                                    


Test string: this-shouldn't=match-at-all

 #  Regex                    Matches: Group 1, Group 2                                       
 1. "^(.*)_(.*)_.*$"        :                                                                 
 2. "^(.*?)_(.*)_.*$"       :                                                                 
 3. "^(.*)_(.*?)_.*$"       :                                                                 
 4. "^(.*?)_(.*?)_.*$"      :                                                                 
 5. "^([^_]*)_([^_]*)_.*$"  :                                                                 

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment