Created
October 12, 2017 17:08
-
-
Save sgibbes/684d88cee14c06f4de55ba7bc75d6f38 to your computer and use it in GitHub Desktop.
find common tiles of tsv'd wdpa and ifl data, and download to local dir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from boto.s3.connection import S3Connection | |
import boto3 | |
import os | |
# connect to the s3 bucket | |
conn = S3Connection(host="s3.amazonaws.com") | |
bucket = conn.get_bucket('gfw2-data') | |
s3 = boto3.resource('s3') | |
# loop through file names in the bucket | |
full_path_list = [key.name for key in bucket.list(prefix='sam')] | |
# unpack the filename from the list of files | |
filename_only_list = [x.split('/')[-1] for x in full_path_list] | |
# make dictionary of {'boundary name': [tile ids]} | |
boundary_dict = {} | |
for boundary in ['ifl', 'wdpa']: | |
boundary_tiles = [] | |
for name in filename_only_list: | |
if name.split("__")[0] == boundary: | |
tile_id = name.split("__")[-1:][0].strip(".tsv") | |
boundary_tiles.append(tile_id) | |
boundary_dict[boundary] = boundary_tiles | |
# find tiles that are the same in both lists from the dictionary | |
same = set(boundary_dict['ifl']) & set(boundary_dict['wdpa']) | |
# download the tsv files that are the same in both | |
for bound in ['ifl', 'wdpa']: | |
if not os.path.exists(bound): | |
os.mkdir(bound) | |
for tileid in same: | |
s3.Bucket('gfw2-data').download_file('sam/{0}__{1}.tsv'.format(bound, tileid), '{0}/{0}__{1}.tsv'.format(bound, tileid)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment