Skip to content

Instantly share code, notes, and snippets.

@apoloval
Created April 27, 2014 10:12
Show Gist options
  • Save apoloval/11342157 to your computer and use it in GitHub Desktop.
Save apoloval/11342157 to your computer and use it in GitHub Desktop.
A Python script to download the navigation charts for all spanish airports from Aena website. It requires BeautifulSoup4 installed in your system.
import os, errno, urllib2
from bs4 import BeautifulSoup
BASE_URL = "http://www.aena.es/csee/Satellite/navegacion-aerea/es/Page/1078418725163/?other=1083158950596#ancla3"
DEFAULT_RETRIES = 3
def leaf_open_list(soup):
if not soup:
return None
r1 = soup.find("li", class_="listOpened")
r2 = leaf_open_list(r1)
if not r2:
return r1
else:
return r2
def make_dir(dir):
try:
os.mkdir(dir)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(dir):
pass
else:
raise
def download_file(url, filename, retries=DEFAULT_RETRIES):
try:
web_content = urllib2.urlopen(url)
target_file = open(filename, "wb")
target_file.write(web_content.read())
target_file.close()
except Exception, e:
print("Error while downloading %s: %s" % (url, e))
if retries > 0:
download_file(url, filename, retries - 1)
else:
raise
finally:
web_content.close()
target_file.close()
def download_html(url):
try:
return urllib2.urlopen(url).read()
except urllib2.HTTPError as e:
print("Error: cannot download webpage from URL %s: %s" % (url, e))
def download(url, folder):
make_dir(folder)
body = download_html(url)
if not body:
return None
soup = BeautifulSoup(body)
open_list = leaf_open_list(soup.find(id="contentLists")).find("ul")
if not open_list:
return None
for li in open_list.find_all("li", class_=""):
url = "http://www.aena.es/%s" % li.find("a").get("href")
filename = li.find("a").find("strong").get_text().replace("/", "_")
pdf_file = "%s/%s.pdf" % (folder, filename)
print("Downloading %s..." % pdf_file)
download_file(url, pdf_file)
for li in open_list.find_all("li", class_="listClosed"):
url = "http://www.aena.es/%s" % li.find("a").get("href")
subfolder = "%s/%s" % (folder, li.find("a").find("strong").get_text().replace("/", "_"))
print("Entering subfolder %s" % subfolder)
download(url, subfolder)
download(BASE_URL, "Airports")
@yvoictra
Copy link

It doesn't work...

root@vps339592:~# python aena.py
Error: cannot download webpage from URL http://www.aena.es/csee/Satellite/navegacion-aerea/es/Page/1078418725163/?other=1083158950596#ancla3: HTTP Error 404: Not Found

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment