Last active
September 26, 2017 12:31
-
-
Save melvyn-sopacua/73f44e64bf8899490dafd299d62b2bea to your computer and use it in GitHub Desktop.
Get all cities in The Netherlands using official dataset and merge them into a single GML file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
import sys | |
import warnings | |
import requests | |
from lxml import etree | |
MAX_COUNT = 1000 | |
MAX_RESULTS = 2501 # hardcoded to save us doing parsing of numberMatched | |
FILENAME_PREFIX = 'woonplaatsen' | |
FILENAME_EXT = 'gml' | |
DEFAULT_OUTFILE = FILENAME_PREFIX + '.' + FILENAME_EXT | |
SEPARATOR = '-' | |
BASE_URL = 'https://geodata.nationaalgeoregister.nl/bag/wfs' | |
DEFAULT_PARAMS = dict(request='GetFeature', service='wfs', | |
typeName='bag:woonplaats', | |
sortBy='bag:identificatie') | |
FILES = [] | |
def download(opts): | |
si = 0 | |
count = min(MAX_COUNT, opts.count) | |
with requests.session() as s: | |
while si < opts.max_results: | |
fname = '{pfx:s}{sep:s}c{cnt:04d}{sep:s}i{si:04d}.{ext:s}'.format( | |
pfx=FILENAME_PREFIX, sep=SEPARATOR, cnt=count, si=si, | |
ext=FILENAME_EXT, | |
) | |
if not os.path.exists(fname) or opts.overwrite: | |
params = DEFAULT_PARAMS.copy() | |
params.update(startIndex=si, count=count) | |
r = s.get(url=BASE_URL, params=params, stream=True) | |
if r.ok: | |
with open(fname, 'wt') as f: | |
f.write(r.content.decode('utf-8')) | |
if not opts.quiet: | |
print('--> {:s}'.format(fname)) | |
else: | |
if not opts.quiet: | |
print('ERROR: {:d}: {:s}'.format(r.status_code, r.url), | |
file=sys.stderr) | |
else: | |
if not opts.quiet: | |
print('File exists: {:s}'.format(fname)) | |
FILES.append(fname) | |
si += count | |
def merge(opts): | |
member_xpath_coll = './{http://www.opengis.net/wfs/2.0}member' | |
first = FILES.pop(0) | |
doc = etree.parse(first) # type: etree.ElementTree | |
root = doc.getroot() # type: etree.ElementBase | |
for file in FILES: | |
d = etree.parse(file) | |
root.extend(d.getroot().findall(member_xpath_coll)) | |
root.set('numberReturned', '{:d}'.format(len(root)).encode('utf-8')) | |
# Shouldn't be in there, but just in case | |
if 'previous' in root.attrib: | |
if not opts.quiet: | |
warnings.warn( | |
'Previous member in first file pointing to: {:s}'.format( | |
doc.get('previous') | |
) | |
) | |
del root.attrib['previous'] | |
# This should be | |
if 'next' in root.attrib: | |
del root.attrib['next'] | |
if opts.outfile == '-' and not opts.quiet: | |
warnings.warn('Outfile set to -, but writing to stdout not supported') | |
warnings.warn('This will create a file called "-"') | |
with open(opts.outfile, 'wb') as f: | |
f.write(etree.tostring(doc, pretty_print=True)) | |
if not opts.quiet: | |
print('Merged to {:s}'.format(opts.outfile)) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
description='Download and merge BAG woonplaatsen into one GML file.' | |
) | |
parser.add_argument('-c', '--count', metavar='batch_count', | |
help='Fetch at max this many per request.', | |
type=int, default=MAX_COUNT) | |
parser.add_argument('-F', '--overwrite', action='store_true', | |
default=False, | |
help='overwrite existing (previously ' | |
'downloaded) files.') | |
parser.add_argument('-m', '--max-results', type=int, | |
dest='max_results', | |
default=MAX_RESULTS, metavar='result_count', | |
help='Use this for maximum results.') | |
parser.add_argument('-M', '--no-merge', action='store_true', default=False, | |
help='Only download files, do not merge', | |
dest='dontmerge') | |
parser.add_argument('-o', '--outfile', default=DEFAULT_OUTFILE, | |
help='Output file to write the merged file to.', | |
metavar='file') | |
parser.add_argument('-q', '--quiet', action='store_true', | |
dest='quiet', | |
default=False, help='provide no output') | |
args = parser.parse_args(sys.argv[1:]) | |
download(args) | |
if not args.dontmerge: | |
merge(args) | |
else: | |
if args.outfile != DEFAULT_OUTFILE and not args.quiet: | |
warnings.warn('outfile argument provided but merge disabled') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment