Created
March 2, 2020 22:06
-
-
Save exbotanical/cf61afec7d2e35046ac105dec64cc71d to your computer and use it in GitHub Desktop.
python, [regex, re module] (more notes re py roborant)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# # # REGEX # # # | |
# The ? matches zero or one of the preceding group. | |
# The * matches zero or more of the preceding group. | |
# The + matches one or more of the preceding group. | |
# The {n} matches exactly n of the preceding group. | |
# The {n,} matches n or more of the preceding group. | |
# The {,m} matches 0 to m of the preceding group. | |
# The {n,m} matches at least n and at most m of the preceding group. | |
# {n,m}? or *? or +? performs a non-greedy match of the preceding group. | |
# ^spam means the string must begin with spam. | |
# spam$ means the string must end with spam. | |
# The . matches any character, except newline characters. | |
# \d, \w, and \s match a digit, word, or space character, respectively. | |
# \D, \W, and \S match anything except a digit, word, or space character, respectively. | |
# [abc] matches any character between the brackets (such as a, b, or c). | |
# [^abc] matches any character that isn’t between the brackets. | |
# def is_phone_number(txt): | |
# if len(txt) != 12: | |
# return False | |
# for i in range(0,3): | |
# if not txt[i].isdecimal: | |
# return False | |
# if txt[3] != '-': | |
# return False | |
# for i in range(4,7): | |
# if not txt[i].isdecimal: | |
# return False | |
# if txt[7] != '-': | |
# return False | |
# for i in range(8,12): | |
# if not txt[i].isdecimal: | |
# return False | |
# return True | |
# message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.' | |
# for i in range(len(message)): | |
# chunk = message[i:i+12] | |
# if is_phone_number(chunk): | |
# print('Phone number found: ' + chunk) | |
# print('Done') | |
# import re | |
# num_regex = re.compile(r'(\d{3})-(\d{3}-\d{4})') | |
# match = num_regex.search('Call me at 415-555-1011 tomorrow.') | |
# area_code = match.group(1) | |
# number = match.group(2) | |
# print(f'Phone number found: ({area_code}) {number}') | |
# ha_regex = re.compile(r'(((Ha){4})+)') | |
# match_ha = ha_regex.findall('I laughed like HaHaHa HaHaHa HaHa HAHaHaHaHaha HaHahaHaHaHA and else-like.') | |
# print(match_ha) | |
# phoneNumRegex = re.compile(r'(\d{3})-(\d{3})-(\d{4})') | |
# print(phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000')) | |
# # create own char class | |
# custom_char_class_regex = re.compile(r'[RNVE]\w+') | |
# b = custom_char_class_regex.findall('ReactJS, VueJS, NodeJS, ExpressJS, C++, Java') | |
# print(b) # oh, it returns only the keywords that match my skillset ! | |
# begins_with_hello = re.compile(r'^Hello') | |
# print(begins_with_hello.search('Hello, world!')) | |
# print(begins_with_hello.search('I said hello.') == None) | |
# ends_with_number = re.compile(r'\d$') | |
# print(ends_with_number.search('Your number is 42')) | |
# # I always confuse the meanings of these two symbols, so I use the mnemonic “Carrots cost dollars” to remind myself that the caret comes first and the dollar sign comes last. | |
# wild_card = re.compile(r'[email protected]') | |
# print(wild_card.search('call that number or email [email protected]')) | |
# atRegex = re.compile(r'.at') | |
# print(atRegex.findall('The cat in the hat sat on the flat mat.')) | |
import re | |
email_regex = re.compile(r'\w+@\w+') | |
m = email_regex.findall('my email is [email protected] and his is [email protected] and yours is [email protected]') | |
print(m) | |
greedy_regex = re.compile(r'(Ha){3,5}') | |
mo1 = greedy_regex.search('HaHaHaHaHa') | |
print(mo1.group()) | |
non_greedy_regex = re.compile(r'(Ha){3,5}?') | |
mo2 = non_greedy_regex.search('HaHaHaHaHa') | |
print(mo2.group()) | |
phone_regex = re.compile(r'\(?\d{3}\)?-\d{3}-\d{4}') | |
l = phone_regex.findall('The first phone number is (713)-214-5039 and the second is 281-889-2034. The suite number is L-303') | |
print(l) | |
newline_regex = re.compile('.*', re.DOTALL) | |
o = newline_regex.search('Serve the public trust.\nProtect the innocent.\nUphold the law.').group() | |
print(o) | |
# IGNORE CASE | |
robocop = re.compile(r'robocop', re.I) | |
print(robocop.search('RoboCop is part man, part machine.').group()) | |
print(robocop.sub('An android', 'RoboCop is part man, part machine.')) | |
agent_names_regex = re.compile(r'Agent (\w)\w*') | |
print(agent_names_regex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.')) | |
# VERBOSE MODE EXAMPLE | |
verbose_regex = re.compile(r'''( | |
(\d{3}|\(\d{3}\))? # area code | |
(\s|-|\.)? # separator | |
\d{3} # first 3 digits | |
(\s|-|\.) # separator | |
\d{4} # last 4 digits | |
(\s*(ext|x|ext.)\s*\d{2,5})? # extension | |
)''', re.VERBOSE) | |
# pass multiple args into compile with the pipe | operator | |
multiple_arg_regex = re.compile('foo', re.IGNORECASE | re.DOTALL | re.VERBOSE) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment