curl http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz | gunzip > /tmp/scene_list
# download 1000 random JSON scene metadata
python landsat8-aws-es.py fetch /tmp/scene_list
# publish the JSON scene metadata in current directory as STAC records to an ES index
python landsat8-aws-es.py load ./
# see results in ES
curl "http://localhost:9200/landsat8-aws/_search?pretty=true" | more
Last active
November 7, 2019 02:48
-
-
Save tomkralidis/3b6263ec9fbd84e6b50d79527dda149f to your computer and use it in GitHub Desktop.
STAC Landsat 8 AWS to ES tooling in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import json | |
import random | |
import requests | |
import sys | |
from elasticsearch import Elasticsearch | |
LIMIT = 1000 | |
def fetch_sample_records(lines): | |
for i in range(1, LIMIT): | |
line = random.choice(lines) | |
download_url = line.split(',')[-1] | |
scene_id = download_url.split('/')[8] | |
json_filename = '{}_MTL.json'.format(scene_id) | |
metadata_url = download_url.replace('index.html', json_filename) | |
print(metadata_url) | |
with open(json_filename, 'wb') as fh: | |
fh.write(requests.get(metadata_url).content) | |
def generate_stac_item(record): | |
minx = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_LR_LON_PRODUCT'] | |
miny = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_LR_LAT_PRODUCT'] | |
maxx = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_UR_LON_PRODUCT'] | |
maxy = record['L1_METADATA_FILE']['PRODUCT_METADATA']['CORNER_UR_LAT_PRODUCT'] | |
datetime = '{}T{}'.format(record['L1_METADATA_FILE']['PRODUCT_METADATA']['DATE_ACQUIRED'], record['L1_METADATA_FILE']['PRODUCT_METADATA']['SCENE_CENTER_TIME']) | |
d = { | |
'id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_SCENE_ID'], | |
'bbox': [minx, miny, maxx, maxy], | |
'geometry': { | |
'type': 'Polygon', | |
'coordinates': [[ | |
[minx, miny], | |
[minx, maxy], | |
[maxx, maxy], | |
[maxx, miny], | |
[minx, miny] | |
]], | |
}, | |
'type': 'Feature', | |
'properties': { | |
'datetime': datetime, | |
'eo:platform': record['L1_METADATA_FILE']['PRODUCT_METADATA']['SPACECRAFT_ID'], | |
'eo:instrument': record['L1_METADATA_FILE']['PRODUCT_METADATA']['SENSOR_ID'], | |
'eo:cloud_cover': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['CLOUD_COVER'], | |
'landsat:wrs_path': record['L1_METADATA_FILE']['PRODUCT_METADATA']['WRS_PATH'], | |
'landsat:wrs_row': record['L1_METADATA_FILE']['PRODUCT_METADATA']['WRS_ROW'], | |
'landsat:image_quality_oli': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['IMAGE_QUALITY_OLI'], | |
'landsat:earth_sun_distance': record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['EARTH_SUN_DISTANCE'], | |
'landsat:scene_id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_SCENE_ID'], | |
'landsat:product_id': record['L1_METADATA_FILE']['METADATA_FILE_INFO']['LANDSAT_PRODUCT_ID'], | |
'landsat:processingLevel': record['L1_METADATA_FILE']['PRODUCT_METADATA']['DATA_TYPE'] | |
}, | |
'assets': {} | |
} | |
if 'IMAGE_QUALITY_TIRS' in record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']: | |
d['properties']['eo:image_quality_tirs'] = record['L1_METADATA_FILE']['IMAGE_ATTRIBUTES']['IMAGE_QUALITY_TIRS'] | |
url = 'https://s3-us-west-2.amazonaws.com/landsat-pds/c1/L8/{}/{}/{}/{}'.format( | |
str(d['properties']['landsat:wrs_path']).zfill(3), | |
str(d['properties']['landsat:wrs_row']).zfill(3), | |
d['properties']['landsat:product_id'], d['properties']['landsat:product_id']) | |
thumb_small = '{}_thumb_small.jpg'.format(url) | |
thumb_large = '{}_thumb_large.jpg'.format(url) | |
d['assets']['thumbnail'] = {'href': thumb_small} | |
d['assets']['thumbnail_large'] = {'href': thumb_large} | |
for i in range(1, 13): | |
band = 'B{}'.format(i) | |
b_url = '{}_{}.TIF'.format(url, band) | |
d['assets'][band] = { | |
'href': b_url, | |
'type': 'image/tiff; application=geotiff; profile=cloud-optimized' | |
} | |
band_ovr = 'B{}_OVR'.format(i) | |
b_url_ovr = '{}_{}.TIF.ovr'.format(url, band) | |
d['assets'][band_ovr] = { | |
'href': b_url_ovr | |
} | |
return d | |
def load_es(dir_): | |
index_name = 'landsat8-aws' | |
type_name = 'FeatureCollection' | |
settings = { | |
'mappings': { | |
'FeatureCollection': { | |
'properties': { | |
'geometry': { | |
'type': 'geo_shape' | |
} | |
} | |
} | |
} | |
} | |
es = Elasticsearch() | |
if es.indices.exists(index_name): | |
es.indices.delete(index_name) | |
# create index | |
es.indices.create(index=index_name, body=settings, request_timeout=90) | |
for f in glob.glob('{}/*.json'.format(dir_)): | |
with open(f) as fh: | |
stac_item = generate_stac_item(json.load(fh)) | |
try: | |
res = es.index(index=index_name, doc_type=type_name, | |
id=stac_item['id'], body=stac_item) | |
except Exception as err: | |
print(stac_item) | |
print(err) | |
if __name__ == '__main__': | |
if sys.argv[1] == 'fetch': | |
with open(sys.argv[2]) as fh: | |
lines = fh.read().splitlines() | |
fetch_sample_records(lines) | |
elif sys.argv[1] == 'load': | |
load_es(sys.argv[2]) | |
else: | |
print('Usage: {} <fetch|load>'.format(sys.argv[0])) | |
sys.exit(2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment