amrrs · May 27, 2019 11:51
diff --git a/hinton.py b/hinton.py
 # download this file
 # install bs4, re, urllib
 # run python3 hinton.py
 from urllib import request
 import re
 import os
 from bs4 import BeautifulSoup

 def main():
 	curdir = os.getcwd()

 	url = "http://www.cs.toronto.edu/~hinton/papers.html"
 	print("Fetching articles information: ", url)
 	with request.urlopen(url) as response:
 		page = response.read().decode('utf-8')

 	soup = BeautifulSoup(page, 'html.parser')
 	links = soup.find_all('a', href=re.compile(r'(\.pdf)'))

 	weird = []
 	correct = []
 	for l in links:
 		if(l.attrs['href'][0] == 'h'):
 			weird.append(l.attrs['href'])
 		else:
 			correct.append(l.attrs['href'])
 	
 	for a in correct:
 		fullurl = "http://www.cs.toronto.edu/~hinton/" + str(a)
 		print(fullurl)
 		if(fullurl[-5:][0] == '/'):
 			pass
 		else:
 			try:
 				request.urlretrieve(fullurl, curdir+"/"+str(a)[5:])
 			except IOError as e:
 				print('error')
 			except Exception as e:
 				print('error')



 if __name__ == "__main__":
 	main()
	# download this file
	# install bs4, re, urllib
	# run python3 hinton.py
	from urllib import request
	import re
	import os
	from bs4 import BeautifulSoup

	def main():
	curdir = os.getcwd()

	url = "http://www.cs.toronto.edu/~hinton/papers.html"
	print("Fetching articles information: ", url)
	with request.urlopen(url) as response:
	page = response.read().decode('utf-8')

	soup = BeautifulSoup(page, 'html.parser')
	links = soup.find_all('a', href=re.compile(r'(\.pdf)'))

	weird = []
	correct = []
	for l in links:
	if(l.attrs['href'][0] == 'h'):
	weird.append(l.attrs['href'])
	else:
	correct.append(l.attrs['href'])

	for a in correct:
	fullurl = "http://www.cs.toronto.edu/~hinton/" + str(a)
	print(fullurl)
	if(fullurl[-5:][0] == '/'):
	pass
	else:
	try:
	request.urlretrieve(fullurl, curdir+"/"+str(a)[5:])
	except IOError as e:
	print('error')
	except Exception as e:
	print('error')



	if __name__ == "__main__":
	main()