AntonOsika · March 17, 2018 13:25
diff --git a/downloadURLs.py b/downloadURLs.py

 import urllib
 import os
 import re

 ##############################
 # Downloads files for every link it finds.
 # The URLs can be handpicked with regex fileURLs.
 ##############################


 f = urllib.urlopen("http://www.math.kth.se/matstat/gru/sf2943/matlabfunctions.html")
 source = f.read()
 f.close()

 fileURLs = []
 # fileURLs +=re.findall(r'href=[\'"]?([^\'">]+)',source) # only CAPITAL HREF is interesting in this case
 fileURLs += re.findall(r'HREF=[\'"]?([^\'">]+)',source)


 URLstart = ''
 #If the URL references are relative:
 URLstart = 'http://www.math.kth.se/matstat/gru/sf2943/'

 print 'Getting URLs that are concatenation with URLstart: \n' + URLstart + '\n'
 for x in fileURLs: 
 	print x

 a = raw_input("\nAre these urls OK?\n")

 for URL in fileURLs:
 	f = urllib.urlopen(URLstart + URL)
 	filename = os.path.split(URL)[1]
 	g = open(filename,'w')
 	g.write(f.read())
 	f.close()	
 	g.close()

	import urllib
	import os
	import re

	##############################
	# Downloads files for every link it finds.
	# The URLs can be handpicked with regex fileURLs.
	##############################


	f = urllib.urlopen("http://www.math.kth.se/matstat/gru/sf2943/matlabfunctions.html")
	source = f.read()
	f.close()

	fileURLs = []
	# fileURLs +=re.findall(r'href=[\'"]?([^\'">]+)',source) # only CAPITAL HREF is interesting in this case
	fileURLs += re.findall(r'HREF=[\'"]?([^\'">]+)',source)


	URLstart = ''
	#If the URL references are relative:
	URLstart = 'http://www.math.kth.se/matstat/gru/sf2943/'

	print 'Getting URLs that are concatenation with URLstart: \n' + URLstart + '\n'
	for x in fileURLs:
	print x

	a = raw_input("\nAre these urls OK?\n")

	for URL in fileURLs:
	f = urllib.urlopen(URLstart + URL)
	filename = os.path.split(URL)[1]
	g = open(filename,'w')
	g.write(f.read())
	f.close()
	g.close()
No results found