Skip to content

Instantly share code, notes, and snippets.

@joefutrelle
Last active April 13, 2018 14:52
Show Gist options
  • Save joefutrelle/1e22d24c62e2da92719e3908cb553949 to your computer and use it in GitHub Desktop.
Save joefutrelle/1e22d24c62e2da92719e3908cb553949 to your computer and use it in GitHub Desktop.
Convert nutrient data from mat export fmt to NetCDF
<dataset type="EDDTableFromNcCFFiles" datasetID="lter-nutrient" active="true">
<reloadEveryNMinutes>10080</reloadEveryNMinutes>
<updateEveryNMillis>10000</updateEveryNMillis>
<fileDir>/home/vagrant/lter-poc/output/</fileDir>
<fileNameRegex>.*\.nc</fileNameRegex>
<recursive>true</recursive>
<pathRegex>.*</pathRegex>
<metadataFrom>last</metadataFrom>
<preExtractRegex></preExtractRegex>
<postExtractRegex></postExtractRegex>
<extractRegex></extractRegex>
<columnNameForExtract></columnNameForExtract>
<sortFilesBySourceNames></sortFilesBySourceNames>
<fileTableInMemory>false</fileTableInMemory>
<accessibleViaFiles>false</accessibleViaFiles>
<!-- sourceAttributes>
<att name="_NCProperties">version=1|netcdflibversion=4.5.0|hdf5libversion=1.10.1</att>
<att name="cdm_data_type">TimeSeriesProfile</att>
<att name="cdm_timeseries_variables">t, crs</att>
<att name="Conventions">CF-1.6</att>
<att name="date_created">2017-12-15T18:03:00Z</att>
<att name="featureType">timeSeriesProfile</att>
<att name="subsetVariables">t, crs</att>
</sourceAttributes -->
<addAttributes>
<att name="Conventions">CF-1.6, COARDS, ACDD-1.3</att>
<att name="infoUrl">???</att>
<att name="institution">WHOI</att>
<att name="keywords">amon, amon_a, amon_b, amon_c, crs, data, event, event_number, event_number_niskin, identifier, local, niskin, ntra, ntra_a, ntra_b, ntra_c, number, phos_a, phos_b, phos_c, phosphate, scla, scla_c, slca, slca_a, slca_b, source, station, statistics, time</att>
<att name="license">[standard]</att>
<att name="sourceUrl">(local files)</att>
<att name="standard_name_vocabulary">CF Standard Name Table v29</att>
<att name="summary">MVCO nutrient data (2003-present)</att>
<att name="title">MVCO nutrient data (2003-present)</att>
<att name="cdm_data_type">TimeSeriesProfile</att>
<att name="featureType">timeSeriesProfile</att>
<att name="cdm_timeseries_variables">time</att>
<att name="cdm_profile_variables">z</att>
<att name="cdm_altitude_proxy">z</att>
<att name="subsetVariables">time, latitude, longitude, crs</att>
</addAttributes>
<dataVariable>
<sourceName>station</sourceName>
<destinationName>station</destinationName>
<dataType>long</dataType>
<!-- sourceAttributes>
<att name="cf_role">timeseries_id</att>
<att name="long_name">station identifier</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Identifier</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>y</sourceName>
<destinationName>latitude</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">Y</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Y</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>x</sourceName>
<destinationName>longitude</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">X</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">X</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>crs</sourceName>
<destinationName>crs</destinationName>
<dataType>int</dataType>
<!-- sourceAttributes>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Unknown</att>
<att name="long_name">CRS</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>t</sourceName>
<destinationName>time</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">T</att>
<att name="standard_name">time</att>
<att name="units">seconds since 1990-01-01 00:00:00Z</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Time</att>
<att name="long_name">T</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>z</sourceName>
<destinationName>z</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="axis">Z</att>
</sourceAttributes -->
<addAttributes>
<att name="ioos_category">Location</att>
<att name="long_name">Z</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>event_number</sourceName>
<destinationName>event_number</destinationName>
<dataType>String</dataType>
<!-- sourceAttributes>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="cf_role">profile_id</att>
<att name="colorBarMaximum" type="double">100.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="ioos_category">Statistics</att>
<att name="long_name">Event Number</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>event_number_niskin</sourceName>
<destinationName>event_number_niskin</destinationName>
<dataType>String</dataType>
<!-- sourceAttributes>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="colorBarMaximum" type="double">100.0</att>
<att name="colorBarMinimum" type="double">0.0</att>
<att name="coordinates">null</att>
<att name="ioos_category">Statistics</att>
<att name="long_name">Event Number Niskin</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_a</sourceName>
<destinationName>ntra_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_nitrate_and_nitrite_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Nitrate plus nitrite concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_b</sourceName>
<destinationName>ntra_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_nitrate_and_nitrite_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Nitrate plus nitrite concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>ntra_c</sourceName>
<destinationName>ntra_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_nitrate_and_nitrite_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Nitrate plus nitrite concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_a</sourceName>
<destinationName>amon_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_ammonium_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ammonium concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_b</sourceName>
<destinationName>amon_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_ammonium_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ammonium concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>amon_c</sourceName>
<destinationName>amon_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_ammonium_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Ammonium concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>slca_a</sourceName>
<destinationName>slca_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_silicate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Silicate concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>slca_b</sourceName>
<destinationName>slca_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_silicate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Silicate concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>scla_c</sourceName>
<destinationName>scla_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_silicate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Silicate concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_a</sourceName>
<destinationName>phos_a</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_phosphate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phosphate concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_b</sourceName>
<destinationName>phos_b</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_phosphate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phosphate concentration</att>
</addAttributes>
</dataVariable>
<dataVariable>
<sourceName>phos_c</sourceName>
<destinationName>phos_c</destinationName>
<dataType>double</dataType>
<!-- sourceAttributes>
<att name="_FillValue" type="double">-9999.9</att>
<att name="coordinates">t z x y</att>
</sourceAttributes -->
<addAttributes>
<att name="standard_name">mole_concentration_of_phosphate_in_sea_water</att>
<att name="units">umol</att>
<att name="coordinates">null</att>
<att name="ioos_category">Unknown</att>
<att name="long_name">Phosphate concentration</att>
</addAttributes>
</dataVariable>
</dataset>
import os
from scipy.io import loadmat
import pandas as pd
from pocean.dsg.timeseriesProfile.om import OrthogonalMultidimensionalTimeseriesProfile as OMTP
MAT_FILE = '/vagrant/nut_data_reps.mat'
OUT_DIR = './output'
mat = loadmat(MAT_FILE, squeeze_me=True)
# construct dataframe
col_map = {
'Event_Number': 'event_number',
'Event_Number_Niskin': 'event_number_niskin',
'Latitude': 'y',
'Longitude': 'x',
'Depth': 'z',
'Nut_a_uM NO2- + NO3-': 'ntra_a',
'Nut_b_uM NO2- + NO3-': 'ntra_b',
'Nut_c_uM NO2- + NO3-': 'ntra_c',
'Nut_a_uM NH4+': 'amon_a',
'Nut_b_uM NH4+': 'amon_b',
'Nut_c_uM NH4+': 'amon_c',
'Nut_a_uM SiO2-': 'slca_a',
'Nut_b_uM SiO2-': 'slca_b',
'Nut_c_uM SiO2-': 'scla_c',
'Nut_a_uM PO43-': 'phos_a',
'Nut_b_uM PO43-': 'phos_b',
'Nut_c_uM PO43-': 'phos_c',
}
cols = mat['header_nut']
d = {}
for i, col in enumerate(cols):
d[col] = pd.Series(list(mat['MVCO_nut_reps'][:,i]))
df = pd.DataFrame(d, columns=cols)
# compute datetimes from start date and incorrect start time cols
dt = []
for d, t in zip(df['Start_Date'], df['Start_Time_UTC']):
dt.append(pd.to_datetime('{}T{}Z'.format(d[:10],t[11:])))
dt = pd.Series(dt)
# add to dataframe
df['t'] = dt
del df['Start_Date']
del df['Start_Time_UTC']
df = df.rename(columns=col_map)
df['z'] = 0 - df['z']
for event_number, sdf in df.groupby('event_number'):
sdf['station'] = [0 for _ in range(len(sdf))]
outpath = os.path.join(OUT_DIR,'{}.nc'.format(event_number))
print('writing {}...'.format(outpath))
OMTP.from_dataframe(sdf, outpath)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment