Last active
May 11, 2016 07:51
-
-
Save peterdesmet/e85479aca7055dbf8e79a0d96c333cd8 to your computer and use it in GitHub Desktop.
Python script to get specific metadata from GBIF-registered datasets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import json | |
import csv | |
import sys | |
def get_datasets(offset, limit): | |
sys.stderr.write('Requesting batch {}\n'.format(offset)) # Use stderr to not contaminate stdout, which is used for the results | |
request = requests.get('http://api.gbif.org/v1/dataset/', params={'limit': limit, 'offset': offset}) | |
results = request.json()['results'] | |
return results | |
def parse_metadata(dataset, fields): | |
metadata = [] | |
for field in fields: | |
try: | |
metadata.append(dataset[field].encode('utf-8')) | |
except KeyError, e: | |
metadata.append('') | |
return metadata | |
def main(limit=20): | |
still_more_datasets = True | |
offset = 0 | |
csvwriter = csv.writer(sys.stdout, lineterminator='\n') | |
# Get fields to keep | |
fields_to_keep = sys.argv[1:] # Remove first element = script itself | |
# Add headers | |
csvwriter.writerow(fields_to_keep) | |
# Request metadata | |
while still_more_datasets: | |
next_batch = get_datasets(offset, limit) | |
if len(next_batch) == 0: | |
# Nothing returned, stop requesting | |
still_more_datasets = False | |
else: | |
for dataset in next_batch: | |
metadata = parse_metadata(dataset, fields_to_keep) | |
csvwriter.writerow(metadata) | |
offset += limit | |
if __name__ == '__main__': # If run from command line | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
python get_gbif_datasets.py key title type rights > datasets.csv