Created
August 29, 2018 03:21
-
-
Save yonglam/d50c05a264fdd738960cf124fb709958 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
phone number regex | |
-------------------''' | |
def isPhoneNumber(text): | |
if len(text) != 12: | |
return False #not phone number-sized | |
for i in range(0, 3): | |
if not text[i].isdecimal(): | |
return False | |
if text[3] != '-': | |
return False | |
for i in range(4, 7): | |
if not text[i].isdecimal(): | |
return False | |
if text[7] != '-': | |
return False | |
for i in range(8, 12): | |
if not text[i].isdecimal(): | |
return False | |
return True | |
print (isPhoneNumber('415-555-1234')) | |
message = "Call me at 415-444-1011 or at 903-772-3878" | |
foundNumber = False | |
for i in range(len(message)): | |
chunk = message[i:i+12] | |
if isPhoneNumber(chunk): | |
print ('Phone Number Found: ' + chunk) | |
foundNumber = True | |
if not foundNumber: | |
print('could not find a phone number') | |
'''--------------------------------------------------------''' | |
import re | |
phoneNumRegex = re.compile() | |
message = "Call me at 415-444-1011 or at 903-772-3878" | |
''' --------------------------------------- ''' | |
''' Regex Basics ''' | |
''' --------------------------------------- ''' | |
#build regular expression object *stored in phoneNumberRegex | |
phoneNumberRegex = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') | |
#Regex object has search method, returns a `match object` | |
mo = phoneNumberRegex.search(message) | |
print mo.group() | |
#find all method returns list of matches | |
print phoneNumberRegex.findall(message) | |
#groups | |
#parenthes mark group | |
phonReg = re.compile(r'(\d\d\d)-(\d\d\d)-(\d\d\d\d)') | |
mo = phonReg.search(message) | |
print mo.group() | |
print mo.group(1) | |
print mo.group(2) | |
print mo.group(3) | |
#literal parenthesis | |
message = 'my phone number is (813)-255-8812)' | |
phoneReg = re.compile(r'\(\d\d\d\)-\d\d\d-\d\d\d\d') | |
mo = phoneReg.search(message) | |
print mo.group() | |
''' --------------------------------------- ''' | |
''' Regex Logic ''' | |
''' --------------------------------------- ''' | |
batRegex = re.compile(r'Bat(man|mobile|copter|bat)') | |
mo = batRegex.search('Batmobile lost a wheel') | |
print mo.group() | |
#? 0 or 1 times only | |
batRegex = re.compile(r'Bat(wo)?man') | |
mo = batRegex.search('The adventures of Batman') | |
moo = batRegex.search('Batwoman loves batman') | |
print mo.group() | |
print moo.group() | |
#looks for area code, but matches even if no area code | |
phoneRegex = re.compile('(\d\d\d-)?\d\d\d-\d\d\d\d') | |
mo = phoneRegex.search("My phone number is 555-8821") | |
print mo.group() | |
# * 0 or more times | |
batRegex = re.compile(r'Bat(wo)*man') | |
mo = batRegex.search('Batwowowowowoman') | |
print mo.group() | |
# + once or more | |
batRegex = re.compile(r'Bat(wo)+man') | |
mo = batRegex.search("Adventures of Batwoman") | |
print mo.group() | |
mo = batRegex.search("Batwowowowoman") | |
print mo.group() | |
''' --------------------------------------- ''' | |
''' Regex Groups ''' | |
''' --------------------------------------- ''' | |
#finding groups | |
haRegex = re.compile(r'(ha){3}') | |
mo = haRegex.search("he said 'hahaha'") | |
print mo.group() | |
#match three phone numbers | |
PhoneReg = re.compile(r"((\d\d\d-)?\d\d\d-\d\d\d\d(,)?( )?){3}") | |
mo = PhoneReg.search('phone numbers 888-888-8888, 233-111-2232 113-1212') | |
print mo.group() | |
#range groups | |
hareg = re.compile(r'(ha){3,5}') | |
mo = hareg.search('hahaha') | |
print mo.group() | |
mo = hareg.search('hahahahaha') | |
print mo.group() | |
hareg = re.compile(r'(ha){,5}') #same as 0-5 | |
hareg = re.compile(r'(ha){3,}') #3 or more | |
#greedy(default) | |
digitRegex = re.compile(r'(\d){3,5}') | |
mo = digitRegex.search('1234567890') | |
print mo.group() #will return the max amount of charachters (5) | |
#non-greedy | |
digitRegex = re.compile(r'(\d){3,5}?') | |
mo = digitRegex.search('0123456789') | |
print mo.group() | |
'''---------------------------------------------''' | |
''' Find All ''' | |
'''---------------------------------------------''' | |
phoneReg = re.compile(r'\d\d\d-\d\d\d-\d\d\d\d') | |
grou = phoneReg.findall(message) | |
#find all with grouping | |
phoneReg = re.compile(r'(\d\d\d-)(\d\d\d-\d\d\d\d)') | |
#print phoneReg.findall(message) | |
'''---------------------------------------------''' | |
''' Character Classes ''' | |
'''---------------------------------------------''' | |
# \d - matches any charachter thats a numeric digit | |
# \D - matches any charachter that is NOT a numeric digit from 0 to 9 | |
# \w - matches any letter nunmberic digit or the underscore character | |
# \W - matches any character that is not a letter, number or _ | |
# \s - matches any space, tab, or newline charachter | |
# \S any charachter that is not a space, tab, or newline | |
christmas = ''' 12 lords leaping | |
11 ladies dancing | |
10 pipers piping | |
9 drummers drumming | |
8 maids milking | |
7 swans swimming | |
6 geese laying | |
5 gold rings | |
4 colly birds | |
3 french hens | |
2 turtle doves and | |
1 partridge in a pear tree ''' | |
#digit one or more times, followed by a space, followed by char 1 or more | |
xmas = re.compile(r'\d+\s\w+') | |
print xmas.findall(christmas) | |
'''---------------------------------------------''' | |
''' Custom Character Classes ''' | |
'''---------------------------------------------''' | |
regexObj = re.compile(r'[aeiou]') #finds vowels | |
regexObj = re.compile(r'[a-z]') #finds all lowercase from a-z | |
regexObj = re.compile(r'[a-fA-F]') #finds all a-f lowercase and capital | |
regexObj = re.compile(r'[aeiouAEIOU]') #finds lowercase and capital vowels | |
print regexObj.findall('robocop eats baby food') | |
# ['o', 'o', 'o', 'e', 'a', 'a', 'o', 'o'] | |
regexObj = re.compile('[aeiouAEIOU]{2}') #match 2 vowels in a row | |
print regexObj.findall('robocop eats baby food') | |
'''---------------------------------------------''' | |
''' Negative Character Classes ''' | |
'''---------------------------------------------''' | |
contsReg = re.compile(r'[^aeiouAEIOU]') #matches any char thats NOT vowel | |
print contsReg.findall('robo cop eats babyfood') | |
#['r', 'b', ' ', 'c', 'p', ' ', 't', 's', ' ', 'b', 'b', 'y', 'f', 'd'] | |
import re | |
''' ----------------------------- ''' | |
''' Regex .* ^ $ ''' | |
''' ----------------------------- ''' | |
# ^ - matches strings that begin with | |
# $ - matches strings that end with | |
# . - matches any char except new line | |
# * - matches zero or more | |
# .*- matches any charachter, any amount of time except newline | |
# default is greedy | |
# .*? - non greedy dotstar expression | |
# re.compile(r'.*', re.DOTALL) - matches ALL characters | |
# re.compile(r'[aeiou]', re.IGNORECASE) - ignores case | |
# re.compile(r'[aeiou]', re.I) - same as above | |
beginHelloRegex = re.compile(r'^Hello') #match string beginning with hello | |
mo = beginHelloRegex.search('Hello how are you') | |
print mo.group() | |
endHelloRegex = re.compile(r'world!$') #matches string that ends with `world` | |
mo = endHelloRegex.search('hello world!') | |
print mo.group() | |
allDigitsRegex = re.compile(r'^\d+$') #begin and end with number | |
mo = allDigitsRegex.search('651652166262') | |
print mo.group() | |
atRegex = re.compile(r'.at') | |
print atRegex.findall("The cat in the hat sat on the flat mat") | |
string = "First Name: John Last Name: Smith" | |
nameRegex = re.compile(r'First Name: (.*) Last Name: (.*)') | |
print nameRegex.findall(string) | |
serve = "<To serve humans> for dinner.>" | |
greedy = re.compile(r'<(.*)>') | |
mo = greedy.search(serve) | |
print mo.group() | |
nongreedRegex = re.compile('<(.*?)>') | |
mo = nongreedRegex.search(serve) | |
print mo.group() | |
#.* matches up to first \n character | |
string = 'Serve the public trust. \n Protect the innocent \n Upload the law' | |
newLineRegex = re.compile('.*') | |
mo = newLineRegex.search(string) | |
print mo.group() | |
# Match All charachters, even new lines | |
string = 'Serve the public trust. \n Protect the innocent \n Upload the law' | |
allCharRegex = re.compile('.*', re.DOTALL) | |
mo = allCharRegex.search(string) | |
print mo.group() | |
string = "AbcdEfghIjklmnOpqrstUvwxyz" | |
caseSenReg = re.compile(r'[aeiou]') | |
print caseSenReg.findall(string) | |
caseInSenReg = re.compile(r'[aeiou]', re.IGNORECASE) | |
print caseInSenReg.findall(string) | |
import re | |
''' -------------------------------- ''' | |
''' re.sub ''' | |
''' -------------------------------- ''' | |
namesRegex = re.compile(r'Agent \w+') #find letter up until space char | |
print namesRegex.findall("Agent Alice gave the secret documents to Agent Bob.") | |
print namesRegex.sub('REDACTED', "Agent Alice gave the secret documents to Agent Bob.") | |
namesRegex = re.compile(r'Agent (\w)\w*') #only matches first char | |
print namesRegex.findall("Agent Alice gave the secret documents to Agent Bob.") | |
print namesRegex.sub(r'AGENT \1****', "Agent Alice gave the secret documents to Agent Bob.") | |
''' -------------------------------- ''' | |
''' re.verbose ''' | |
''' -------------------------------- ''' | |
#allows long strings and comments inside of expression | |
re.compile(r''' | |
\d\d\d- #area code | |
\d\d\d- | |
\d\d\d\d''', re.VERBOSE) | |
''' -------------------------------- ''' | |
''' Bitwise Comparison ''' | |
''' -------------------------------- ''' | |
re.compile('\d\d\d', re.IGNORECASE | re.DOTALL | re.VERBOSE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment