Last active
August 9, 2022 00:50
-
-
Save thuwarakeshm/9c1f427c5fe9e7aca5df563f3218a984 to your computer and use it in GitHub Desktop.
Regex with PRegEx
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pregex.classes import AnyButWhitespace | |
from pregex.groups import Capture | |
from pregex.quantifiers import OneOrMore, AtLeastAtMost | |
pattern = ( | |
OneOrMore(AnyButWhitespace()) | |
+ "@" | |
+ Capture( | |
OneOrMore(AnyButWhitespace()) + "." + AtLeastAtMost(AnyButWhitespace(), 2, 3) | |
) | |
) | |
text = """My names is Alice. I live in Wonderland. You can mail me: [email protected]. | |
In case if I couldn't reply, please main my friend the White Rabbit: [email protected]. | |
But for more serious issues, you should main Tony Stark at [email protected]. | |
""" | |
# Get everything you captured. | |
pattern.get_captures(text) | |
# [('wonderland.com',), ('wonderland.com',), ('stark.org',)] | |
# Get all your matches. | |
pattern.get_matches(text) | |
# ['[email protected]', '[email protected]', '[email protected]'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pregex.classes import AnyButWhitespace | |
from pregex.groups import Capture | |
from pregex.operators import Either | |
from pregex.quantifiers import OneOrMore | |
pattern = ( | |
OneOrMore(AnyButWhitespace()) | |
+ "@" | |
+ Capture(OneOrMore(AnyButWhitespace()) + Either(".com", ".org")) | |
) | |
text = """My names is Alice. I live in Wonderland. You can mail me: [email protected]. | |
In case if I couldn't reply, please main my friend the White Rabbit: [email protected]. | |
But for more serious issues, you should main Tony Stark at [email protected]. | |
Please don't message [email protected] | |
""" | |
pattern.get_captures(text) | |
# [('wonderland.com',), ('wonderland.com',), ('stark.org',)] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pregex.classes import AnyDigit | |
from pregex.quantifiers import Exactly, Optional | |
pattern = Exactly(AnyDigit(), 5) + Optional("-" + Exactly(AnyDigit(), 4)) | |
address1 = "730 S White Sands Blvd, Alamogordo, NM 88310, United States" | |
address2 = "730 S White Sands Blvd, Alamogordo, NM 88310-7421, United States" | |
pattern.get_matches(address1) | |
# ['88310'] | |
pattern.get_matches(address2) | |
# ['88310-7421'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
pattern = r"\d{5}(-\d{4})?" | |
address = "730 S White Sands Blvd, Alamogordo, NM 88310, United States" | |
zip_code = re.search(pattern, address).group() | |
print(zip_code) | |
# 88310 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment