-
-
Save OneGneissGuy/ba0d3c50976702d4de481d4e8861276e to your computer and use it in GitHub Desktop.
Code for managing soil moisture data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
:DESCRIPTION:Code for managing soil moisture data | |
:REQUIRES: | |
:TODO:More error handling | |
:AUTHOR: John Franco Saraceno | |
:ORGANIZATION: U.S. Geological Survey, United States Department of Interior | |
:CONTACT: [email protected] | |
:VERSION: 1.0 | |
Fri Jul 29 18:48:40 2016 | |
""" | |
# ============================================================================= | |
# IMPORT STATEMENTS | |
# ============================================================================= | |
import glob | |
import matplotlib.pyplot as plt | |
import os | |
import pandas as pd | |
def list_of_files(path, fmatch): | |
files = [] | |
for name in glob.glob(os.path.join(path, fmatch)): | |
if os.path.isfile(os.path.join(path, name)): | |
files.append(name) | |
return files | |
def create_datetime_index(df): | |
dataframe = df.copy() | |
time = dataframe['HRMin'].values | |
# use list comp to convert time format to pandas friendly format | |
timetuple = [divmod(int(f) % 2400, 100) for f in time] | |
hour = pd.to_timedelta([x[0] for x in timetuple], unit='H') | |
minute = pd.to_timedelta([x[1] for x in timetuple], unit='m') | |
day = (pd.to_timedelta(dataframe['JulianDay'], unit='D') - | |
pd.Timedelta('1D')) | |
year = pd.to_datetime(dataframe['Year'], format='%Y') | |
# convert dataframe index to date index | |
dataframe.index = year + day + hour + minute | |
return dataframe | |
def process_file(filename, cols): | |
try: | |
# read in the .dat file as a pandas dataframe | |
df = pd.read_csv(filename, delimiter=',', skiprows=0, index_col=False, | |
names=cols, na_values='-9999.000') | |
# give the data a datetime index | |
df = create_datetime_index(df) | |
# resample dataframe to a monthly mean | |
df_resmpld = df.resample('D', label='left').mean() | |
# replace monthly mean of precip with monthly sum of precip | |
precip_col = 'PPTmm' | |
if precip_col in df_resmpld.columns: | |
df_resmpld[precip_col] = df[precip_col | |
].resample('D', label='left').sum() | |
return df_resmpld | |
except: | |
print "Tried, but could not process this file: {}".format(filename) | |
def process_directory(directory): | |
# get a list of files to process based on the filter criteria, | |
# *.dat by default | |
# you could also filter by site/year | |
# create list of column names for dataframe | |
column_names = ['Year', 'JulianDay', 'HRMin', 'TmpC', 'RelHum', 'BattV', | |
'PPTmm', 'Temp5cm', 'Temp10cm', 'Temp15cm', 'Temp20cm', | |
'Temp50cm', 'Temp100cm', 'SM5cm', 'SM10cm', 'SM15cm', | |
'SM20cm', 'SM50cm', 'SM100cm'] | |
files = list_of_files(directory, '*.dat') | |
# process and concatenate the files into a master dataframe | |
means = pd.concat(process_file(f, column_names) for f in files) | |
# sort the master dataframe by date time | |
means.sort_index(axis=0, ascending=True, inplace=True) | |
# name the index | |
means.index.name = 'Date' | |
return means | |
def plot_sm_data(df): | |
# create a plot | |
plt.figure() | |
# plot the soil moisture data | |
df.plot(x=df.index, y=['SM5cm', 'SM10cm', 'SM15cm', | |
'SM20cm', 'SM50cm', 'SM100cm']) | |
def main(): | |
# set the data file containing directory | |
directory = os.getcwd() | |
df_master = process_directory(directory) | |
# save the master dataframe to a csv file with date time in YYYY-MM format | |
date_fmt = '%Y-%m-%d' | |
# or Mon-YYYYformat | |
# date_fmt = '%b-%Y' | |
df_master.to_csv('Daily_means.csv', date_format=date_fmt, errors=coerce) | |
plot_sm_data(df_master) | |
if __name__ == "__main__": | |
main() |
Forced pandas to stop tying to automatically create an index.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Version 8-10:
Simple refactor.