hernamesbarbara · October 20, 2018 20:21
diff --git a/README.md b/README.md
diff --git a/findemails.py b/findemails.py
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*- 
 """findemails.py

 Usage:
    findemails URL [--output OUTPUT]

 Arguments:
    URL                    Website from which you want to extract email addresses

 Options:
    -o --output OUTPUT     [default: stdout].
    -h --help              Show this message.

 Examples:
    findemails https://zombierecords.com/staff/
    findemails findemails.py https://zombierecords.com/staff/ --output zombie-emails.csv

 """

 import os
 import sys
 from docopt import docopt
 import requests
 from bs4 import BeautifulSoup
 import html5lib
 import pandas as pd

 def get_soup(url):
    r = requests.get(url)
    try:
        soup = BeautifulSoup(r.text, "html5lib")
    except Exception as err:
        sys.stderr.write(str(err))
        soup = None
    return soup

 def find_emails(soup):
    emails = []
    for tag in soup.find_all("a"):
        link = tag.get('href','').strip()
        if link and link.startswith('mailto:'):
            email = link.split(':')[-1].strip()
            label = tag.get_text().strip()
            emails.append({"email": email, "label": label})
    return emails


 def main():
    args = docopt(__doc__)
    url = args['URL']
    output = args['--output']
    soup = get_soup(url)
    if not soup:
        sys.stderr.write('couldnt access the URL provided')
        sys.exit(1)
    emails = find_emails(soup)
    if not emails:
        sys.stderr.write('couldnt find any emails')
        sys.exit(1)
    emails = pd.DataFrame(emails)
    if output == 'stdout':
        outfile = sys.stdout
    else:
        outfile = output
    emails.to_csv(outfile, index=False, encoding='utf-8')
    sys.exit(0)

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	# -- coding: utf-8 --
	"""findemails.py

	Usage:
	findemails URL [--output OUTPUT]

	Arguments:
	URL Website from which you want to extract email addresses

	Options:
	-o --output OUTPUT [default: stdout].
	-h --help Show this message.

	Examples:
	findemails https://zombierecords.com/staff/
	findemails findemails.py https://zombierecords.com/staff/ --output zombie-emails.csv

	"""

	import os
	import sys
	from docopt import docopt
	import requests
	from bs4 import BeautifulSoup
	import html5lib
	import pandas as pd

	def get_soup(url):
	r = requests.get(url)
	try:
	soup = BeautifulSoup(r.text, "html5lib")
	except Exception as err:
	sys.stderr.write(str(err))
	soup = None
	return soup

	def find_emails(soup):
	emails = []
	for tag in soup.find_all("a"):
	link = tag.get('href','').strip()
	if link and link.startswith('mailto:'):
	email = link.split(':')[-1].strip()
	label = tag.get_text().strip()
	emails.append({"email": email, "label": label})
	return emails


	def main():
	args = docopt(__doc__)
	url = args['URL']
	output = args['--output']
	soup = get_soup(url)
	if not soup:
	sys.stderr.write('couldnt access the URL provided')
	sys.exit(1)
	emails = find_emails(soup)
	if not emails:
	sys.stderr.write('couldnt find any emails')
	sys.exit(1)
	emails = pd.DataFrame(emails)
	if output == 'stdout':
	outfile = sys.stdout
	else:
	outfile = output
	emails.to_csv(outfile, index=False, encoding='utf-8')
	sys.exit(0)

	if __name__ == '__main__':
	main()
No results found