exbotanical · March 2, 2020 22:06
diff --git a/py_regex_notes.py b/py_regex_notes.py
 # # #   REGEX   # # # 

 # The ? matches zero or one of the preceding group.
 # The * matches zero or more of the preceding group.
 # The + matches one or more of the preceding group.
 # The {n} matches exactly n of the preceding group.
 # The {n,} matches n or more of the preceding group.
 # The {,m} matches 0 to m of the preceding group.
 # The {n,m} matches at least n and at most m of the preceding group.
 # {n,m}? or *? or +? performs a non-greedy match of the preceding group.
 # ^spam means the string must begin with spam.
 # spam$ means the string must end with spam.
 # The . matches any character, except newline characters.
 # \d, \w, and \s match a digit, word, or space character, respectively.
 # \D, \W, and \S match anything except a digit, word, or space character, respectively.
 # [abc] matches any character between the brackets (such as a, b, or c).
 # [^abc] matches any character that isn’t between the brackets.



 # def is_phone_number(txt):
 #   if len(txt) != 12:
 #     return False
 #   for i in range(0,3):
 #     if not txt[i].isdecimal:
 #       return False
 #     if txt[3] != '-':
 #       return False
 #   for i in range(4,7):
 #     if not txt[i].isdecimal:
 #       return False
 #     if txt[7] != '-':
 #       return False
 #   for i in range(8,12):
 #     if not txt[i].isdecimal:
 #       return False
 #   return True
  
 # message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'
 # for i in range(len(message)):
 #   chunk = message[i:i+12]
 #   if is_phone_number(chunk):
 #     print('Phone number found: ' + chunk)
 # print('Done')

 # import re

 # num_regex = re.compile(r'(\d{3})-(\d{3}-\d{4})')

 # match = num_regex.search('Call me at 415-555-1011 tomorrow.')

 # area_code = match.group(1)
 # number = match.group(2)

 # print(f'Phone number found: ({area_code}) {number}')

 # ha_regex = re.compile(r'(((Ha){4})+)')

 # match_ha = ha_regex.findall('I laughed like HaHaHa HaHaHa HaHa HAHaHaHaHaha HaHahaHaHaHA and else-like.')

 # print(match_ha)

 # phoneNumRegex = re.compile(r'(\d{3})-(\d{3})-(\d{4})')

 # print(phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000'))

 # # create own char class

 # custom_char_class_regex = re.compile(r'[RNVE]\w+')

 # b = custom_char_class_regex.findall('ReactJS, VueJS, NodeJS, ExpressJS, C++, Java')

 # print(b) # oh, it returns only the keywords that match my skillset ! 

 # begins_with_hello = re.compile(r'^Hello')
 # print(begins_with_hello.search('Hello, world!'))

 # print(begins_with_hello.search('I said hello.') == None)

 # ends_with_number = re.compile(r'\d$')
 # print(ends_with_number.search('Your number is 42'))

 # # I always confuse the meanings of these two symbols, so I use the mnemonic “Carrots cost dollars” to remind myself that the caret comes first and the dollar sign comes last.

 # wild_card = re.compile(r'.@gmail.com')
 # print(wild_card.search('call that number or email hello@gmail.com'))

 # atRegex = re.compile(r'.at')
 # print(atRegex.findall('The cat in the hat sat on the flat mat.'))


 import re

 email_regex = re.compile(r'\w+@\w+')

 m = email_regex.findall('my email is abc@gmail.com and his is notarealemail@gmail.com and yours is email@email.com')

 print(m)

 greedy_regex = re.compile(r'(Ha){3,5}')
 mo1 = greedy_regex.search('HaHaHaHaHa')
 print(mo1.group())

 non_greedy_regex = re.compile(r'(Ha){3,5}?')
 mo2 = non_greedy_regex.search('HaHaHaHaHa')
 print(mo2.group())

 phone_regex = re.compile(r'\(?\d{3}\)?-\d{3}-\d{4}')

 l = phone_regex.findall('The first phone number is (713)-214-5039 and the second is 281-889-2034. The suite number is L-303')

 print(l)

 newline_regex = re.compile('.*', re.DOTALL)

 o = newline_regex.search('Serve the public trust.\nProtect the innocent.\nUphold the law.').group()

 print(o) 

 # IGNORE CASE

 robocop = re.compile(r'robocop', re.I)
 print(robocop.search('RoboCop is part man, part machine.').group())

 print(robocop.sub('An android', 'RoboCop is part man, part machine.'))

 agent_names_regex = re.compile(r'Agent (\w)\w*')
 print(agent_names_regex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.'))

 # VERBOSE MODE EXAMPLE

 verbose_regex = re.compile(r'''(
    (\d{3}|\(\d{3}\))?            # area code
    (\s|-|\.)?                    # separator
    \d{3}                         # first 3 digits
    (\s|-|\.)                     # separator
    \d{4}                         # last 4 digits
    (\s*(ext|x|ext.)\s*\d{2,5})?  # extension
    )''', re.VERBOSE)

 # pass multiple args into compile with the pipe | operator

 multiple_arg_regex = re.compile('foo', re.IGNORECASE | re.DOTALL | re.VERBOSE)
	# # # REGEX # # #

	# The ? matches zero or one of the preceding group.
	# The * matches zero or more of the preceding group.
	# The + matches one or more of the preceding group.
	# The {n} matches exactly n of the preceding group.
	# The {n,} matches n or more of the preceding group.
	# The {,m} matches 0 to m of the preceding group.
	# The {n,m} matches at least n and at most m of the preceding group.
	# {n,m}? or *? or +? performs a non-greedy match of the preceding group.
	# ^spam means the string must begin with spam.
	# spam$ means the string must end with spam.
	# The . matches any character, except newline characters.
	# \d, \w, and \s match a digit, word, or space character, respectively.
	# \D, \W, and \S match anything except a digit, word, or space character, respectively.
	# [abc] matches any character between the brackets (such as a, b, or c).
	# [^abc] matches any character that isn’t between the brackets.



	# def is_phone_number(txt):
	# if len(txt) != 12:
	# return False
	# for i in range(0,3):
	# if not txt[i].isdecimal:
	# return False
	# if txt[3] != '-':
	# return False
	# for i in range(4,7):
	# if not txt[i].isdecimal:
	# return False
	# if txt[7] != '-':
	# return False
	# for i in range(8,12):
	# if not txt[i].isdecimal:
	# return False
	# return True

	# message = 'Call me at 415-555-1011 tomorrow. 415-555-9999 is my office.'
	# for i in range(len(message)):
	# chunk = message[i:i+12]
	# if is_phone_number(chunk):
	# print('Phone number found: ' + chunk)
	# print('Done')

	# import re

	# num_regex = re.compile(r'(\d{3})-(\d{3}-\d{4})')

	# match = num_regex.search('Call me at 415-555-1011 tomorrow.')

	# area_code = match.group(1)
	# number = match.group(2)

	# print(f'Phone number found: ({area_code}) {number}')

	# ha_regex = re.compile(r'(((Ha){4})+)')

	# match_ha = ha_regex.findall('I laughed like HaHaHa HaHaHa HaHa HAHaHaHaHaha HaHahaHaHaHA and else-like.')

	# print(match_ha)

	# phoneNumRegex = re.compile(r'(\d{3})-(\d{3})-(\d{4})')

	# print(phoneNumRegex.findall('Cell: 415-555-9999 Work: 212-555-0000'))

	# # create own char class

	# custom_char_class_regex = re.compile(r'[RNVE]\w+')

	# b = custom_char_class_regex.findall('ReactJS, VueJS, NodeJS, ExpressJS, C++, Java')

	# print(b) # oh, it returns only the keywords that match my skillset !

	# begins_with_hello = re.compile(r'^Hello')
	# print(begins_with_hello.search('Hello, world!'))

	# print(begins_with_hello.search('I said hello.') == None)

	# ends_with_number = re.compile(r'\d$')
	# print(ends_with_number.search('Your number is 42'))

	# # I always confuse the meanings of these two symbols, so I use the mnemonic “Carrots cost dollars” to remind myself that the caret comes first and the dollar sign comes last.

	# wild_card = re.compile(r'.@gmail.com')
	# print(wild_card.search('call that number or email hello@gmail.com'))

	# atRegex = re.compile(r'.at')
	# print(atRegex.findall('The cat in the hat sat on the flat mat.'))


	import re

	email_regex = re.compile(r'\w+@\w+')

	m = email_regex.findall('my email is abc@gmail.com and his is notarealemail@gmail.com and yours is email@email.com')

	print(m)

	greedy_regex = re.compile(r'(Ha){3,5}')
	mo1 = greedy_regex.search('HaHaHaHaHa')
	print(mo1.group())

	non_greedy_regex = re.compile(r'(Ha){3,5}?')
	mo2 = non_greedy_regex.search('HaHaHaHaHa')
	print(mo2.group())

	phone_regex = re.compile(r'\(?\d{3}\)?-\d{3}-\d{4}')

	l = phone_regex.findall('The first phone number is (713)-214-5039 and the second is 281-889-2034. The suite number is L-303')

	print(l)

	newline_regex = re.compile('.*', re.DOTALL)

	o = newline_regex.search('Serve the public trust.\nProtect the innocent.\nUphold the law.').group()

	print(o)

	# IGNORE CASE

	robocop = re.compile(r'robocop', re.I)
	print(robocop.search('RoboCop is part man, part machine.').group())

	print(robocop.sub('An android', 'RoboCop is part man, part machine.'))

	agent_names_regex = re.compile(r'Agent (\w)\w*')
	print(agent_names_regex.sub(r'\1****', 'Agent Alice told Agent Carol that Agent Eve knew Agent Bob was a double agent.'))

	# VERBOSE MODE EXAMPLE

	verbose_regex = re.compile(r'''(
	(\d{3}\|\(\d{3}\))? # area code
	(\s\|-\|\.)? # separator
	\d{3} # first 3 digits
	(\s\|-\|\.) # separator
	\d{4} # last 4 digits
	(\s(ext\|x\|ext.)\s\d{2,5})? # extension
	)''', re.VERBOSE)

	# pass multiple args into compile with the pipe \| operator

	multiple_arg_regex = re.compile('foo', re.IGNORECASE \| re.DOTALL \| re.VERBOSE)
No results found