jrjames83 · May 4, 2016 03:44
diff --git a/scrape.py b/scrape.py
 from bs4 import BeautifulSoup
 import requests
 import urllib

 #if that shit doesn't load, do from terminal 

 #python -m pip install module name or pip install module

 url = "http://www.americanrhetoric.com/barackobamaspeeches.htm"
 response = requests.get(url)
 soup = BeautifulSoup(response.text)


 """
 actual pattern
 http://www.americanrhetoric.com/mp3clipsXE/barackobama/barackobamanursesassociationARXE.mp3


 mp3clipsXE/barackobama/barackobamaguantanimobayclosingARXE.mp3
 mp3clipsXE/barackobama/barackobamaISILupdate02-25-16ARXE.mp3
 mp3clipsXE/barackobama/barackobamarecoveryact2016ARXE.mp3



 """

 stub = "http://www.americanrhetoric.com/"
 files = [] #put all files in a list

 for a in soup.findAll('a'):
  if '.mp3' in a['href']:
    files.append(stub + a['href']) # add them to a list but since relative paths use stub each time

 somefiles =  files[:10] #first 10 files to test download (just use files for all of em)
 print len(files)

 #Download the fuckers - just ave as last 9 chars of file
 #added basic try except in case one flops you won't kill the script, the terminal will print the error
 try:
 	for x in somefiles:
 		urllib.urlretrieve(x, x.split("/")[5])
 except Exception, e:
 	print str(e)
 	pass
	from bs4 import BeautifulSoup
	import requests
	import urllib

	#if that shit doesn't load, do from terminal

	#python -m pip install module name or pip install module

	url = "http://www.americanrhetoric.com/barackobamaspeeches.htm"
	response = requests.get(url)
	soup = BeautifulSoup(response.text)


	"""
	actual pattern
	http://www.americanrhetoric.com/mp3clipsXE/barackobama/barackobamanursesassociationARXE.mp3


	mp3clipsXE/barackobama/barackobamaguantanimobayclosingARXE.mp3
	mp3clipsXE/barackobama/barackobamaISILupdate02-25-16ARXE.mp3
	mp3clipsXE/barackobama/barackobamarecoveryact2016ARXE.mp3



	"""

	stub = "http://www.americanrhetoric.com/"
	files = [] #put all files in a list

	for a in soup.findAll('a'):
	if '.mp3' in a['href']:
	files.append(stub + a['href']) # add them to a list but since relative paths use stub each time

	somefiles = files[:10] #first 10 files to test download (just use files for all of em)
	print len(files)

	#Download the fuckers - just ave as last 9 chars of file
	#added basic try except in case one flops you won't kill the script, the terminal will print the error
	try:
	for x in somefiles:
	urllib.urlretrieve(x, x.split("/")[5])
	except Exception, e:
	print str(e)
	pass