Created
December 15, 2020 15:43
-
-
Save chuckcoggins/4e25804f1f38a5bc85423394a0def63f to your computer and use it in GitHub Desktop.
Email & Phone Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! python 3 | |
import re | |
import pyperclip | |
# Create regex for phone numbers | |
phone_num_regex = re.compile(r''' | |
# 415-555-0000, 555-0000, (415) 555-0000, 555-0000 ext 12345, ext. 12345, x12345 | |
( | |
((\d{3})|(\(\d{3}))? # area code (optional) | |
(\s|-) # first seperator | |
\d{3} # first 3 digits | |
- # second seperator | |
\d{4} # last 4 digits | |
(((ext(\.)?\s)|x) # extension word-part (optional) | |
(\d{0,5}))? # extension number-part (optional) | |
) | |
''', re.VERBOSE) | |
# Create a regex for emails | |
email_regex = re.compile(r''' | |
[a-zA-Z0-9_.+]+ # name part | |
@ # @ symbol | |
[a-zA-Z0-9_.+]+ # domain provider | |
''', re.VERBOSE) | |
# Get the text from the Clipboard | |
text = pyperclip.paste() | |
# Extract the email/phone from text pasted from Clipboard | |
extracted_phone = phone_num_regex.findall(text) | |
extracted_email = email_regex.findall(text) | |
all_phone_numbers = [] | |
for phoneNum in extracted_phone: | |
all_phone_numbers.append(phoneNum[0]) | |
# Copy the extracted email/phone to the clipboard | |
end_results = '\n'.join(all_phone_numbers) + '\n' + '\n'.join(extracted_email) | |
pyperclip.copy(end_results) | |
# print(end_results) | |
# open notepad, word, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment