apoloval · April 27, 2014 10:12 · yvoictra · Dec 14, 2016
diff --git a/aena-charts.py b/aena-charts.py
 import os, errno, urllib2
 from bs4 import BeautifulSoup

 BASE_URL = "http://www.aena.es/csee/Satellite/navegacion-aerea/es/Page/1078418725163/?other=1083158950596#ancla3"
 DEFAULT_RETRIES = 3

 def leaf_open_list(soup):
 	if not soup:
 		return None
 	r1 = soup.find("li", class_="listOpened")
 	r2 = leaf_open_list(r1)
 	if not r2:
 		return r1
 	else:
 		return r2

 def make_dir(dir):
 	try:
 		os.mkdir(dir)
 	except OSError as e:
 		if e.errno == errno.EEXIST and os.path.isdir(dir):
 			pass
 		else:
 			raise

 def download_file(url, filename, retries=DEFAULT_RETRIES):
 	try:
 		web_content = urllib2.urlopen(url)
 		target_file = open(filename, "wb")
 		target_file.write(web_content.read())
 		target_file.close()
 	except Exception, e:
 		print("Error while downloading %s: %s" % (url, e))
 		if retries > 0:
 			download_file(url, filename, retries - 1)
 		else:
 			raise
 	finally:
 		web_content.close()
 		target_file.close()

 def download_html(url):
 	try:
 		return urllib2.urlopen(url).read()
 	except urllib2.HTTPError as e:
 		print("Error: cannot download webpage from URL %s: %s" % (url, e))

 def download(url, folder):
 	make_dir(folder)
 	body = download_html(url)
 	if not body:
 		return None
 	soup = BeautifulSoup(body)
 	open_list = leaf_open_list(soup.find(id="contentLists")).find("ul")
 	if not open_list:
 		return None

 	for li in open_list.find_all("li", class_=""):
 		url = "http://www.aena.es/%s" % li.find("a").get("href")
 		filename = li.find("a").find("strong").get_text().replace("/", "_")
 		pdf_file = "%s/%s.pdf" % (folder, filename)
 		print("Downloading %s..." % pdf_file)
 		download_file(url, pdf_file)

 	for li in open_list.find_all("li", class_="listClosed"):
 		url = "http://www.aena.es/%s" % li.find("a").get("href")
 		subfolder = "%s/%s" % (folder, li.find("a").find("strong").get_text().replace("/", "_"))
 		print("Entering subfolder %s" % subfolder)
 		download(url, subfolder)

 download(BASE_URL, "Airports")
	import os, errno, urllib2
	from bs4 import BeautifulSoup

	BASE_URL = "http://www.aena.es/csee/Satellite/navegacion-aerea/es/Page/1078418725163/?other=1083158950596#ancla3"
	DEFAULT_RETRIES = 3

	def leaf_open_list(soup):
	if not soup:
	return None
	r1 = soup.find("li", class_="listOpened")
	r2 = leaf_open_list(r1)
	if not r2:
	return r1
	else:
	return r2

	def make_dir(dir):
	try:
	os.mkdir(dir)
	except OSError as e:
	if e.errno == errno.EEXIST and os.path.isdir(dir):
	pass
	else:
	raise

	def download_file(url, filename, retries=DEFAULT_RETRIES):
	try:
	web_content = urllib2.urlopen(url)
	target_file = open(filename, "wb")
	target_file.write(web_content.read())
	target_file.close()
	except Exception, e:
	print("Error while downloading %s: %s" % (url, e))
	if retries > 0:
	download_file(url, filename, retries - 1)
	else:
	raise
	finally:
	web_content.close()
	target_file.close()

	def download_html(url):
	try:
	return urllib2.urlopen(url).read()
	except urllib2.HTTPError as e:
	print("Error: cannot download webpage from URL %s: %s" % (url, e))

	def download(url, folder):
	make_dir(folder)
	body = download_html(url)
	if not body:
	return None
	soup = BeautifulSoup(body)
	open_list = leaf_open_list(soup.find(id="contentLists")).find("ul")
	if not open_list:
	return None

	for li in open_list.find_all("li", class_=""):
	url = "http://www.aena.es/%s" % li.find("a").get("href")
	filename = li.find("a").find("strong").get_text().replace("/", "_")
	pdf_file = "%s/%s.pdf" % (folder, filename)
	print("Downloading %s..." % pdf_file)
	download_file(url, pdf_file)

	for li in open_list.find_all("li", class_="listClosed"):
	url = "http://www.aena.es/%s" % li.find("a").get("href")
	subfolder = "%s/%s" % (folder, li.find("a").find("strong").get_text().replace("/", "_"))
	print("Entering subfolder %s" % subfolder)
	download(url, subfolder)

	download(BASE_URL, "Airports")