Last active
January 14, 2016 20:30
-
-
Save flyingeek/9d5086085b17e8ba27a5 to your computer and use it in GitHub Desktop.
Extraction des terrains OACI disposant d'un D-ATIS à partir du PDF AFC_07JAN.pdf. En sortie un fichier texte contenant juste un code OACI 4 lettres par ligne pour les terrains qui ont un D-ATIS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import subprocess | |
import sys | |
pdf_file = '/Users/Eric/Downloads/AFC_07JAN.pdf' | |
text_file = '/Users/Eric/Desktop/D-ATIS_AFC_07JAN.txt' | |
# pdftotext is required | |
# OSX: install HomeBrew then brew install poppler | |
# Windows: bad luck ? | |
sys.stdout.write("Converting %s to text ..." % pdf_file) | |
lines = subprocess.check_output(['pdftotext', pdf_file, '-']).split('\n') | |
print "Done !" | |
airport = None | |
datis = False | |
found = [] | |
def add_airport_if_datis(): | |
if datis and airport and airport not in found: | |
found.append(airport) | |
for line in lines: | |
# for each page (\f is FormFeed) check if airport and datis found | |
if '\f' in line: | |
add_airport_if_datis() | |
airport = None | |
datis = False | |
line = line.strip() | |
if line: | |
m = re.match(r"(?P<iata>[A-Z]{3})-(?P<oaci>[A-Z]{4})", line) | |
if m: | |
airport = m.group('oaci') | |
continue | |
m = re.match(r"(?P<oaci>[A-Z]{4})-(?P<iata>[A-Z]{3})", line) | |
if m: | |
airport = m.group('oaci') | |
continue | |
m = re.match(r"d\s*?-\s*?a\s*?t\s*?i\s*?s", line, re.I) | |
if m: | |
datis = True | |
continue | |
# run again at EOF | |
add_airport_if_datis() | |
sys.stdout.write("Saving to %s ..." % text_file) | |
with open(text_file, 'w') as output: | |
for item in sorted(found): | |
output.write("%s\n" % item) | |
print "Done !" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment