sengupta · January 16, 2012 18:46 · letronje · Jan 16, 2012 · sengupta · Apr 21, 2012
diff --git a/README.md b/README.md
diff --git a/LICENSE.txt b/LICENSE.txt
            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

  0. You just DO WHAT THE FUCK YOU WANT TO. 
diff --git a/scrape.py b/scrape.py
 #!/usr/bin/python
 #  This program is free software. It comes without any warranty, to
 #  the extent permitted by applicable law. You can redistribute it
 #  and/or modify it under the terms of the Do What The Fuck You Want
 #  To Public License, Version 2, a copy of which is provided in the 
 #  file LICENSE.txt. 

 # Enclose the line below in a loop to have it scrape over multiple pages of a site.
 # This line currently scrapes one page to pull out emails. 

 import re
 import sys
 import urllib

 url = urllib.urlopen(sys.argv[1])
 response = url.read() 

 regex = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')

 emails = regex.findall(response)
 with open('emails.csv', 'w+') as email_file: 
    email_file.write('\n'.join(set(emails)))

diff --git a/scrape.sh b/scrape.sh
 #/bin/bash
 #  This program is free software. It comes without any warranty, to
 #  the extent permitted by applicable law. You can redistribute it
 #  and/or modify it under the terms of the Do What The Fuck You Want
 #  To Public License, Version 2, a copy of which is provided in the 
 #  file LICENSE.txt. 

 # Enclose the line below in a loop to have it scrape over multiple pages of a site.
 # This line currently scrapes one page to pull out emails. 

 curl -s "$1" | sed 's/<[^>]*>//g' | sed -e 's/^[ \t]*//' | sed  's/&nbsp;//' | grep -srhw "[[:alnum:]_.-]\+@[[:alnum:]_.-]\+" >> emails.csv
	DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
	TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

	0. You just DO WHAT THE FUCK YOU WANT TO.
	#!/usr/bin/python
	# This program is free software. It comes without any warranty, to
	# the extent permitted by applicable law. You can redistribute it
	# and/or modify it under the terms of the Do What The Fuck You Want
	# To Public License, Version 2, a copy of which is provided in the
	# file LICENSE.txt.

	# Enclose the line below in a loop to have it scrape over multiple pages of a site.
	# This line currently scrapes one page to pull out emails.

	import re
	import sys
	import urllib

	url = urllib.urlopen(sys.argv[1])
	response = url.read()

	regex = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}')

	emails = regex.findall(response)
	with open('emails.csv', 'w+') as email_file:
	email_file.write('\n'.join(set(emails)))
	#/bin/bash
	# This program is free software. It comes without any warranty, to
	# the extent permitted by applicable law. You can redistribute it
	# and/or modify it under the terms of the Do What The Fuck You Want
	# To Public License, Version 2, a copy of which is provided in the
	# file LICENSE.txt.

	# Enclose the line below in a loop to have it scrape over multiple pages of a site.
	# This line currently scrapes one page to pull out emails.

	curl -s "$1" \| sed 's/<[^>]>//g' \| sed -e 's/^[ \t]//' \| sed 's/ //' \| grep -srhw "[[:alnum:]_.-]\+@[[:alnum:]_.-]\+" >> emails.csv