Skip to content

Instantly share code, notes, and snippets.

@OneGneissGuy
Forked from mikshila/SM_data.py
Last active August 1, 2016 15:40
Show Gist options
  • Save OneGneissGuy/ba0d3c50976702d4de481d4e8861276e to your computer and use it in GitHub Desktop.
Save OneGneissGuy/ba0d3c50976702d4de481d4e8861276e to your computer and use it in GitHub Desktop.
Code for managing soil moisture data
# -*- coding: utf-8 -*-
"""
:DESCRIPTION:Code for managing soil moisture data
:REQUIRES:
:TODO:More error handling
:AUTHOR: John Franco Saraceno
:ORGANIZATION: U.S. Geological Survey, United States Department of Interior
:CONTACT: [email protected]
:VERSION: 1.0
Fri Jul 29 18:48:40 2016
"""
# =============================================================================
# IMPORT STATEMENTS
# =============================================================================
import glob
import matplotlib.pyplot as plt
import os
import pandas as pd
def list_of_files(path, fmatch):
files = []
for name in glob.glob(os.path.join(path, fmatch)):
if os.path.isfile(os.path.join(path, name)):
files.append(name)
return files
def create_datetime_index(df):
dataframe = df.copy()
time = dataframe['HRMin'].values
# use list comp to convert time format to pandas friendly format
timetuple = [divmod(int(f) % 2400, 100) for f in time]
hour = pd.to_timedelta([x[0] for x in timetuple], unit='H')
minute = pd.to_timedelta([x[1] for x in timetuple], unit='m')
day = (pd.to_timedelta(dataframe['JulianDay'], unit='D') -
pd.Timedelta('1D'))
year = pd.to_datetime(dataframe['Year'], format='%Y')
# convert dataframe index to date index
dataframe.index = year + day + hour + minute
return dataframe
def process_file(filename, cols):
try:
# read in the .dat file as a pandas dataframe
df = pd.read_csv(filename, delimiter=',', skiprows=0, index_col=False,
names=cols, na_values='-9999.000')
# give the data a datetime index
df = create_datetime_index(df)
# resample dataframe to a monthly mean
df_resmpld = df.resample('D', label='left').mean()
# replace monthly mean of precip with monthly sum of precip
precip_col = 'PPTmm'
if precip_col in df_resmpld.columns:
df_resmpld[precip_col] = df[precip_col
].resample('D', label='left').sum()
return df_resmpld
except:
print "Tried, but could not process this file: {}".format(filename)
def process_directory(directory):
# get a list of files to process based on the filter criteria,
# *.dat by default
# you could also filter by site/year
# create list of column names for dataframe
column_names = ['Year', 'JulianDay', 'HRMin', 'TmpC', 'RelHum', 'BattV',
'PPTmm', 'Temp5cm', 'Temp10cm', 'Temp15cm', 'Temp20cm',
'Temp50cm', 'Temp100cm', 'SM5cm', 'SM10cm', 'SM15cm',
'SM20cm', 'SM50cm', 'SM100cm']
files = list_of_files(directory, '*.dat')
# process and concatenate the files into a master dataframe
means = pd.concat(process_file(f, column_names) for f in files)
# sort the master dataframe by date time
means.sort_index(axis=0, ascending=True, inplace=True)
# name the index
means.index.name = 'Date'
return means
def plot_sm_data(df):
# create a plot
plt.figure()
# plot the soil moisture data
df.plot(x=df.index, y=['SM5cm', 'SM10cm', 'SM15cm',
'SM20cm', 'SM50cm', 'SM100cm'])
def main():
# set the data file containing directory
directory = os.getcwd()
df_master = process_directory(directory)
# save the master dataframe to a csv file with date time in YYYY-MM format
date_fmt = '%Y-%m-%d'
# or Mon-YYYYformat
# date_fmt = '%b-%Y'
df_master.to_csv('Daily_means.csv', date_format=date_fmt, errors=coerce)
plot_sm_data(df_master)
if __name__ == "__main__":
main()
@OneGneissGuy
Copy link
Author

Use me!

@OneGneissGuy
Copy link
Author

@Version 6:

  1. Added crude error handling to skips mis-formatted files
  2. Now calculates daily means
  3. Changed output timestamp to match daily means.

@OneGneissGuy
Copy link
Author

OneGneissGuy commented Jul 31, 2016

@Version 7:

  1. added support to flag for nan values (-9999.000)

@OneGneissGuy
Copy link
Author

OneGneissGuy commented Jul 31, 2016

@Version 8-10:
Simple refactor.

@OneGneissGuy
Copy link
Author

Forced pandas to stop tying to automatically create an index.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment