Created
May 27, 2014 23:56
-
-
Save rectangletangle/9251b9c524aabf252774 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
# This is the literal code as copied from the file, for our purposes it's equivalent to the code below. | |
""" | |
def extract_hashtag_values(text, pattern=settings.HASHTAG_PATTERN): | |
for hashtag_match in re.finditer(pattern, text): | |
yield hashtag_match.group(1) | |
""" | |
# This utilizes functions, list comprehensions, and regexs to do something useful and reusable. | |
def extract_hashtags(text): | |
# This is a pretty complicated regex. Usually, they're simpler than this. The complexity is neccessary in order to | |
# catch a lot of "edge cases." So this isn't a great example in that regard. | |
# http://en.wikipedia.org/wiki/Edge_case | |
pattern = '(?:(?<=\s)|^)#(\w*[A-Za-z\d\-]{2,60}\w*)' | |
return [hashtag_match.group(1) for hashtag_match in re.finditer(pattern, text)] | |
def extract_phone_numbers(text): | |
# This uses a simpler regex to do an overall similar task. `\(` matches `(`, and `\d` matches 0-9 | |
pattern = '\(\d\d\d\)-\d\d\d-\d\d\d\d' | |
# `finditer` makes an iterator (something we can loop through) of matches to our pattern. | |
return [hashtag_match.group(0) for hashtag_match in re.finditer(pattern, text)] | |
if __name__ == '__main__': | |
# Here we call the function, this runs the code defined above twice. | |
print(extract_hashtags('some #hashtagged #text')) # ['hashtagged', 'text'] | |
print(extract_hashtags('#foo bar #baz #1-2-3')) # ['foo', 'baz', '1-2-3'] | |
# Notice the number in the middle isn't an *exact* pattern match. | |
print(extract_phone_numbers('(123)-456-7890 342-234-3245 (098)-765-4321')) # ['(123)-456-7890', '(098)-765-4321'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment