Last active
December 15, 2015 22:48
-
-
Save ungarj/5335126 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"remotewcsurl" : "http://neso.cryoland.enveo.at/cryoland/ows?service=wcs&version=2.0.0&", | |
"localwcsurl" : "http://earthserver.eox.at/eoxserver/ows?service=wcs&version=2.0.0&", | |
"datasetseries": { | |
"daily_FSC_Baltic_Optical": { | |
"local_dir": "/home/ungarj/temp/getSnowMaps/CRYOLAND/daily_FSC_Baltic_Optical/", | |
"collection": "daily_FSC_Baltic_Optical", | |
"datatype" : "GreyCube:GreySet3" | |
} | |
}, | |
"datasetseries_commented": { | |
"multitemp_FSC_Scandinavia_Optical": { | |
"local_dir": "/media/data/transfer/CRYOLAND/multitemp_FSC_Scandinavia_Optical/", | |
"collection": "multitemp_FSC_Scandinavia_Optical", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"daily_SWE_PanEuropean_Microwave": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_SWE_PanEuropean_Microwave/", | |
"collection": "daily_SWE_PanEuropean_Microwave", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"daily_FSC_PanEuropean_Optical": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_PanEuropean_Optical/", | |
"collection": "daily_FSC_PanEuropean_Optical", | |
"datatype" : "GreyCube:GreySet3", | |
"xres": "12200", | |
"yres": "7400" | |
}, | |
"daily_SCAW_Scandinavia_Radar": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_SCAW_Scandinavia_Radar/", | |
"collection": "daily_SCAW_Scandinavia_Radar", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"daily_FSC_Alps_Optical": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_Alps_Optical/", | |
"collection": "daily_FSC_Alps_Optical", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"daily_LIE_Baltic_Optical": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_LIE_Baltic_Optical/", | |
"collection": "daily_LIE_Baltic_Optical", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"daily_FSC_PanEuropean_Optical_Uncertainty": { | |
"local_dir": "/media/data/transfer/CRYOLAND/daily_FSC_PanEuropean_Optical_Uncertainty/", | |
"collection": "daily_FSC_PanEuropean_Optical_Uncertainty", | |
"datatype" : "GreyCube:GreySet3" | |
}, | |
"multitemp_FSC_Scandinavia_Optical_Radar": { | |
"local_dir": "/media/data/transfer/CRYOLAND/multitemp_FSC_Scandinavia_Optical_Radar/", | |
"collection": "multitemp_FSC_Scandinavia_Optical_Radar", | |
"datatype" : "GreyCube:GreySet3" | |
} | |
}, | |
"storagedir" : "/home/ungarj/temp/getSnowMaps/", | |
"ansi_date_start" : "datetime(1601, 1, 1)", | |
"unix_date_start" : "datetime(1970, 1, 1)", | |
"crs1" : "http://kahlua.eecs.jacobs-university.de:8080/def/crs/EPSG/0/4326", | |
"crs2" : "http://kahlua.eecs.jacobs-university.de:8080/def/crs/OGC/0/Temporal?epoch=%221970-01-01T00:00:00Z%22%26uom=%22s%22%26label=%22unix%22", | |
"tiling" : "tiling aligned [0:511,0:511,0:0] tile size 262144" | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Copyright (c) 2013 | |
# | |
# Joachim Ungar ([email protected]) | |
# Fabian Schindler ([email protected]) | |
# Oliver Clements ([email protected]) | |
# | |
# This code was developed during the EU e-Infrastructure funded project | |
# EarthServer (www.earthserver.eu, Project Number: 283610). | |
# | |
# Some parts were used from a script developed by Oliver Clements: | |
# https://gist.github.com/doclements/5f3a29ee94e6ed9978bb | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is furnished | |
# to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
import urllib2 | |
import os | |
import subprocess | |
import sys | |
import argparse | |
import pickle | |
import fnmatch | |
import tempfile | |
import json | |
from osgeo import gdal | |
from datetime import datetime, timedelta, date, time | |
from xml.dom import minidom | |
# Configuration | |
# ============= | |
ansi_date_start = datetime(1601, 1, 1) | |
unix_date_start = datetime(1970, 1, 1) | |
remote_file_list = [] | |
diff_list = [] | |
describe_eo_coverage_set_url = "" | |
get_coverage_url = "" | |
def main(args): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--no-download", dest="download", default=True, action="store_false") | |
parser.add_argument("--no-ingest", dest="ingest", default=True, action="store_false") | |
parser.add_argument("--no-register", dest="register", default=True, action="store_false") | |
parser.add_argument("--config", dest="config") | |
parser.add_argument("timediff", nargs=1, type=int) | |
parsed = parser.parse_args(args) | |
download = parsed.download | |
ingest = parsed.ingest | |
register = parsed.register | |
timediff = parsed.timediff[0] | |
# load config | |
with open(parsed.config, "r") as f: | |
config = json.load(f) | |
remotewcsurl = config["remotewcsurl"] | |
localwcsurl = config["localwcsurl"] | |
datasetseries = config["datasetseries"] | |
storagedir = config["storagedir"] | |
tiling = config["tiling"] | |
crs1 = config["crs1"] | |
crs2 = config["crs2"] | |
#print "ansi_date_start " + str(ansi_date_start) | |
#print "unix_date_start " + str(unix_date_start) | |
#print "remotewcsurl " + remotewcsurl | |
#print "localwcsurl " + localwcsurl | |
#print "datasetseries " + str(datasetseries) | |
#print "storagedir " + storagedir | |
#print "tiling " + tiling | |
print ("\n") | |
print datetime.now() | |
print ("==========") | |
print ("\n") | |
for i in datasetseries: | |
diff_list = [] | |
remote_file_list = [] | |
local_file_list = [] | |
local_dir = datasetseries[i]["local_dir"] | |
dataset_series_id = i | |
collection = datasetseries[i]["collection"] | |
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i | |
if "xres" in datasetseries[i].keys(): | |
xres = int(datasetseries[i]["xres"]) | |
yres = int(datasetseries[i]["yres"]) | |
else: | |
xres = 0 | |
yres = 0 | |
if not os.path.exists(local_dir): | |
os.makedirs(local_dir) | |
local_file_list = get_local_filelist(local_dir) | |
time_offset = int(timediff) | |
print (collection + ":") | |
print ("-------------------------------") | |
print ("\n") | |
try: | |
if download: | |
print "download" | |
# create DescribeEOCoverageSet URL | |
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i | |
# make DescribeEOCoverage request | |
describe_eo_coverage_set_xml = describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset) | |
# parse response and get list of remote files | |
remote_file_list = parse_xml(describe_eo_coverage_set_xml) | |
# compare remote and local files and determine files to be downloaded | |
diff_list = compare_files(remote_file_list,local_file_list) | |
#print (str(len(remote_file_list)) + " item(s) since " + str(TIMEOFFSET) + " days") | |
print (str(len(remote_file_list)) + " item(s) since " + str(time_offset) + " days") | |
print (str(len(diff_list)) + " new") | |
# download files | |
download_files(remotewcsurl,diff_list,local_dir) | |
if ingest: | |
print "ingest" | |
# import in rasdaman | |
datatype = datasetseries[i]["datatype"] | |
import_files(local_dir,collection,datatype,crs1,crs2,xres,yres,storagedir,remotewcsurl,tiling) | |
if register: | |
print "register" | |
# create DescribeEOCoverageSet URL | |
describe_eo_coverage_set_url = remotewcsurl + "request=describeeocoverageset&eoid=" + i | |
# make DescribeEOCoverage request | |
describe_eo_coverage_set_xml = describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset) | |
# parse response and get list of remote files | |
#remote_file_list = parse_xml(describe_eo_coverage_set_xml) | |
remote_file_list = "" | |
# compare remote and local files and determine files to be downloaded | |
local_file_list = get_local_filelist(local_dir) | |
register_list = compare_files(local_file_list,remote_file_list) | |
print (str(len(local_file_list)) + " on local file list") | |
print (str(len(remote_file_list)) + " on remote list") | |
print (str(len(register_list)) + " on register list") | |
register_files(register_list,local_dir,collection) | |
print ("done") | |
except urllib2.URLError, e: | |
print ("ERROR: " + str(e)) | |
if ingest and len(diff_list) > 0: | |
print "cleaning up and restarting... " | |
#clean_up_petascope() | |
restart_rasdaman() | |
# get local filelist | |
def get_local_filelist(dir): | |
filelist = [] | |
for file in os.listdir(dir): | |
if fnmatch.fnmatch(file, '*.tif'): | |
filelist.append(file) | |
return filelist | |
# make a DescribeEOCoverageSet request and return the response XML | |
def describe_eo_coverage_set(describe_eo_coverage_set_url,time_offset): | |
end_time = date.today() | |
#start_time = end_time - timedelta(days=TIMEOFFSET) | |
start_time = end_time - timedelta(days=time_offset) | |
describe_eo_coverage_set_url = describe_eo_coverage_set_url + '&subset=phenomenonTime("' + str(start_time) + '","' + str(end_time) + '")' | |
describe_eo_coverage_set_xml = urllib2.urlopen(describe_eo_coverage_set_url) | |
return describe_eo_coverage_set_xml | |
# parse DescribeEOCoverageSet XML and return list of remote coverages | |
def parse_xml(describe_eo_coverage_set_xml): | |
remote_file_list = [] | |
xml = minidom.parse(describe_eo_coverage_set_xml) | |
itemlist = xml.getElementsByTagName('wcs:CoverageDescription') | |
for i in itemlist: | |
remote_file_list.append(i.attributes['gml:id'].value) | |
return remote_file_list | |
# returns list of coverages available remotely but not locally | |
def compare_files(remote_file_list,local_file_list): | |
diff_list = [] | |
for i in remote_file_list: | |
if i not in local_file_list: | |
diff_list.append(i) | |
return diff_list | |
# download files | |
def download_files(remotewcsurl,diff_list,local_dir): | |
for i in diff_list: | |
get_coverage_url = remotewcsurl + "request=getcoverage&coverageid=" + i | |
download_file = local_dir + i | |
print ("downloading " + i + "...") | |
try: | |
open(download_file, 'w').write(urllib2.urlopen(get_coverage_url).read()) | |
print ("download successful") | |
except urllib2.URLError, e: | |
print ("ERROR: " + e) | |
# retrieve id(s) of object(s) in given rasdaman collection | |
def get_oid(col): | |
p = subprocess.Popen('rasql -q "select (long)oid(c) from %s as c" --out string' % col , stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE) | |
p.wait() | |
output = p.stdout.read() | |
p.stdout.close() | |
oid = [] | |
for line in output.split('\n'): | |
if 'Result element' in line: | |
oid.append(line.split(':')[1].strip()) | |
return oid | |
# import new files in rasdaman collection | |
def import_files(dir,collection,datatype,crs1,crs2,xres,yres,storagedir,remotewcsurl,tiling): | |
# pickle exports two lists as textfiles during the import procedure for future runs of the script: | |
rasdaman_success_location = storagedir + "rasdaman_" + collection + "_success" | |
rasdaman_error_location = storagedir + "rasdaman_" + collection + "_error" | |
# open list of successfully imported coverages from previous run or create if not available. | |
rasdaman_success = import_list(rasdaman_success_location) | |
# open list of coverages not successfully imported or create if not available. | |
rasdaman_error = import_list(rasdaman_error_location) | |
# merge list of new coverages with list of old coverages not yet imported | |
local_file_list = get_local_filelist(dir) | |
diff_list = compare_files(local_file_list,rasdaman_success) | |
ingest_list = diff_list + rasdaman_error | |
print (str(len(ingest_list)) + " coverages to be ingested") | |
# reset rasdaman_error list | |
rasdaman_error = [] | |
rasdaman_error_file = open(rasdaman_error_location,'w+') | |
pickle.dump(rasdaman_error, rasdaman_error_file) | |
# sort ingest_list | |
ingest_list = sort_list(ingest_list,dir,remotewcsurl) | |
# ingest coverages | |
for i in ingest_list: | |
# get timestamp | |
unixtime = get_timestamp(i,remotewcsurl,dir) | |
ingestfile = dir+i | |
temp_ingestfile = "" | |
if (xres>0) and (yres>0): | |
datafile = gdal.Open(dir+i) | |
cols = datafile.RasterXSize | |
rows = datafile.RasterYSize | |
print "%s, %s" %(xres, yres) | |
print "%s, %s" %(cols, rows) | |
if (cols!=xres) and (rows!=yres): | |
print "resampling ..." | |
temp_ingestfile = tempfile.mktemp() | |
resize = "gdal_translate -outsize %s %s %s %s" %(xres, yres, ingestfile, temp_ingestfile) | |
p = subprocess.Popen(resize, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE) | |
p.wait() | |
output = p.stdout.read() | |
print output | |
ingestfile = temp_ingestfile | |
else: | |
print "no resampling" | |
try: | |
# TODO: more detailed error handling | |
# (1) error handling on OID read error --> solution restart rasdaman | |
# (2) | |
# ERROR - RasdamanHelper2::updateImage, l. 678: image update failed: RasManager Error: Write transaction in progress, please retry again later. | |
# ERROR - rimport::main, l. 1364: RasManager Error: Write transaction in progress, please retry again later. | |
# TODO: check on import via rasql? | |
# $ rasql -q "SELECT 1 FROM <collection> AS c WHERE some_cells(c[*:*,*:*,<t_i>] != 0)" --out hex | grep "Query result" | awk 'BEGIN {FS=" "}; { print $5 }' | { read out; [[ $out -eq "1" ]] && echo TRUE || echo FALSE; } | |
# TODO: backup solution for rasdaman_success lists? | |
# TODO: reset ps_domain table | |
print unixtime | |
rasimport = 'rasimport -f %s --coll %s --coverage-name %s -t %s --crs-uri "%s":"%s" --crs-order 1:0:2 --csz 1 --z-coords %s --tiling "%s"' % (ingestfile, collection, collection, datatype, crs1, crs2, unixtime, tiling) | |
print ("Import " + i + " to " + collection) | |
print rasimport | |
# start rasimport | |
p = subprocess.Popen(rasimport, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE) | |
p.wait() | |
output = p.stdout.read() | |
print output | |
if output: | |
print "rasdaman input error" | |
rasdaman_error.append(i) | |
export_list(rasdaman_error,rasdaman_error_location) | |
else: | |
print "import successful" | |
rasdaman_success.append(i) | |
export_list(rasdaman_success,rasdaman_success_location) | |
p.stdout.close() | |
except: | |
print "rasdaman input error" | |
rasdaman_error.append(i) | |
export_list(rasdaman_error,rasdaman_error_location) | |
print "\n" | |
if os.path.isfile(temp_ingestfile): | |
os.remove(temp_ingestfile) | |
# sort file list according to timestamp | |
def sort_list(ingest_list,dir,remotewcsurl): | |
time_dict = {} | |
for i in ingest_list: | |
time_dict[i] = get_timestamp(i,remotewcsurl,dir) | |
return sorted(ingest_list, key=lambda i: time_dict[i]) | |
# get UNIX timestamp | |
def get_timestamp(i,remotewcsurl,dir): | |
describe_coverage_url = remotewcsurl + "request=describecoverage&coverageid=" + i | |
describe_coverage_xml = dir + i + ".xml" | |
try: | |
open(describe_coverage_xml, 'r') | |
#print ("reading local metadata") | |
except: | |
open(describe_coverage_xml, 'w').write(urllib2.urlopen(describe_coverage_url).read()) | |
open(describe_coverage_xml) | |
#print ("getting metadata from server") | |
# determine coverage timestamp and convert to UNIX timestamp | |
xml = minidom.parse(describe_coverage_xml) | |
coverage_date = xml.getElementsByTagName('gml:timePosition').item(0).firstChild.wholeText | |
coverage_date = datetime.strptime(coverage_date, "%Y-%m-%dT%H:%M:%SZ") | |
unixtime = (coverage_date - unix_date_start).days*24*60*60 | |
return unixtime | |
# import list from previous run or create if not available | |
def import_list(location): | |
try: | |
file = open(location,'r+') | |
list = pickle.load(file) | |
except IOError, EOFError: | |
list = [] | |
file = open(location,'w+') | |
pickle.dump(list,file) | |
return list | |
# export list for next run | |
def export_list(list,location): | |
file = open(location,'w+') | |
pickle.dump(list, file) | |
# renaming z-axis in petascopedb and restart rasdaman | |
def clean_up_petascope(): | |
print ("cleaning up...") | |
for d in datasetseries: | |
collection = datasetseries[d]["collection"] | |
column = ["type='6'", "name='t'"] | |
for c in column: | |
sql = "psql -d petascopedb -c \"update ps_domain set %s where coverage=(select id from ps_coverage where name=\'%s\') and i=\'2\'\"" % (c,collection) | |
print sql | |
p = subprocess.Popen(sql, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE) | |
p.wait() | |
output = p.stdout.read() | |
print output | |
def restart_rasdaman(): | |
restart = "sudo /etc/init.d/rasdaman restart" | |
print restart | |
try: | |
p = subprocess.Popen(restart, stderr=subprocess.STDOUT, shell=True, stdout=subprocess.PIPE) | |
p.wait() | |
output = p.stdout.read() | |
print output | |
except: | |
print ("ERROR occured") | |
def register_files(register_list,dir,collection): | |
print (str(len(register_list)) + " coverages to be registered") | |
# TODO loop through diff: | |
for r in register_list: | |
print r | |
# TODO get timestamp & footprint | |
# TODO register to dataset series | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment