Last active
August 24, 2020 15:48
-
-
Save KMarkert/6470e3e0a4005d41ade1f4b9c1c55375 to your computer and use it in GitHub Desktop.
This Python script takes the metadata from the USGS Landsat Bulk Metadata Service in XML format and finds coincidence dates for water quality station samples
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import xmltodict | |
import numpy as np | |
import pandas as pd | |
""" | |
XML metata data file is take from https://landsat.usgs.gov/landsat-bulk-metadata-service | |
""" | |
# Define input files | |
lsmetaData = r'~/metadata_XXXX.xml' | |
inDateFile = r'~/wq_dataTable.csv' | |
statData = pd.read_csv(inDateFile) | |
# Get input water quality station and sampling dates | |
statID = np.array(statData.StatID,dtype=str) | |
sampleDates = np.array(statData.SDate,dtype=str) | |
# Get unique stations | |
stations = np.unique(statID) | |
# Create empty lists to populate | |
statList = ['StationID'] | |
sDateList = ['SampleDate'] | |
lsIdList = ['SceneID'] | |
lDateList = ['SceneDate'] | |
for i in range(stations.size): | |
stat = stations[i] | |
idx = np.where(statID==stat) | |
tmpDates = sampleDates[idx] | |
try: | |
with open(lsmetaData.replace('XXXX',stat)) as fd: | |
doc = xmltodict.parse(fd.read()) | |
except IOError: | |
print 'Passing station {0}'.format(stat) | |
continue | |
metaData = doc['searchResponse']['metaData'] | |
for j in range(len(metaData)): | |
dDate = metaData[j]['acquisitionDate'].split('-') | |
lsDate = datetime.date(int(dDate[0]),int(dDate[1]),int(dDate[2])) | |
lsId = str(metaData[j]['sceneID']) | |
for k in range(tmpDates.size): | |
# Update how to grab date based on input data | |
d = tmpDates[k].split('/') | |
# Make a date variable | |
# Expects the date string format to be MM/dd/YYYY | |
sampDate = datetime.date(int(d[2]),int(d[0]),int(d[1])) | |
deltaT = abs(sampDate - lsDate) | |
if deltaT.days < 2: | |
statList.append(stat) | |
sDateList.append(sampDate.isoformat()) | |
lsIdList.append(lsId) | |
lDateList.append(lsDate.isoformat()) | |
outfile = r'~/out_Landsat_coincidence.csv' | |
df = pd.DataFrame(zip(*[statList,sDateList,lsIdList,lDateList])) | |
df.to_csv(outfile, sep=',', encoding='utf-8') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example format for the
inDateFile
CSV:StatID
is the station unique identifier andSDate
is a date that a sample was take for the station.