Created
March 23, 2021 19:20
-
-
Save htkcodes/b56142cbf179722bbc6542cb91b45d49 to your computer and use it in GitHub Desktop.
Scrapes emails from a file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I forked this from somewhere but i forgot where, it a had a regex bug which i fixed. | |
import re | |
fileToRead = 'emails.txt' | |
fileToWrite = 'emailExtracted.txt' | |
delimiterInFile = [',', ';'] | |
def validateEmail(strEmail): | |
# .* Zero or more characters of any type. | |
if re.match("(.*)@(.*)\.(.*)", strEmail): | |
return True | |
return False | |
def writeFile(listData): | |
file = open(fileToWrite, 'w+') | |
strData = "" | |
for item in listData: | |
strData = strData+item+'\n' | |
file.write(strData) | |
listEmail = [] | |
file = open(fileToRead, 'r') | |
listLine = file.readlines() | |
for itemLine in listLine: | |
item =str(itemLine) | |
for delimeter in delimiterInFile: | |
item = item.replace(str(delimeter),' ') | |
wordList = item.split() | |
for word in wordList: | |
if(validateEmail(word)): | |
listEmail.append(word) | |
if listEmail: | |
uniqEmail = set(listEmail) | |
print(len(uniqEmail),"emails collected!") | |
writeFile(uniqEmail) | |
else: | |
print("No email found.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment