Created
December 12, 2009 10:14
-
-
Save erikbgithub/254836 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
sep = re.compile(r"[^ |]+") | |
# a possible string that might be delivered by one of my collegues over http | |
test_text = "q1w2e3r4t5z|11°11.111'N 22°22.222'W" | |
def parse_all(text, dispatcher): | |
""" | |
parses over a given text using a dispatch table. the logic itself is in the dispatch table. | |
this way, the parse function itself can parse anything with any grammar. | |
but keep in mind that the dispatch functions have to be generated with generate_parser()! | |
""" | |
result = dict() | |
elems = sep.findall(text) | |
return dict((name, dispatcher[name](e)) for e in elems for name in dispatcher.keys() | |
if dispatcher[name](e) != None) | |
def generate_parser(regex): | |
""" | |
this function will produce another function that takes any string and returns | |
the matching part of it, or None | |
these parse functions work well in a dispatch table for parse_all. | |
""" | |
reg = re.compile(regex) | |
return lambda text: reg.search(text) and reg.search(text).group() or None | |
d_table = dict() | |
d_table['lat'] = generate_parser(r"\d{1,2}°\d{2}.\d{3}'[NS]") | |
d_table['lon'] = generate_parser(r"\d{1,2}°\d{2}.\d{3}'[WE]") | |
d_table['id'] = generate_parser(r"[a-zA-Z]\w{6,}") | |
print parse_all(test_text, d_table) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment