Skip to content

Instantly share code, notes, and snippets.

View kperry2215's full-sized avatar

Kirsten Perry kperry2215

View GitHub Profile
class fracfocus_data_search:
"""
This class generates an object that is used the filter the master
fracfocus dataframe so it only contains a certain state/state abbreviation,
county (list), and/or operator
"""
def __init__(self, state=None, state_abbreviation=None, county_list=None,
operator=None):
#All data in initialize def optional depending on what kind of filtering
#Pull all of the fracfocus data from a csv
fracfocus_registry=pd.read_csv('fracfocus_data_example.csv', low_memory=False)
#Make all of the state column lowercase
fracfocus_filter=fracfocus_data_search(state='Texas', state_abbreviation='TX',
county_list=['Andrews', 'Borden', 'Crane', 'Dawson',
'Ector', 'Eddy', 'Gaines', 'Glasscock'], operator='XTO')
#Filter dataframe by its parameters
subsetted_df=fracfocus_filter.filter_dataframe(fracfocus_registry, column_state='StateName',
column_county='CountyName', column_operator='OperatorName')
def generate_plot(df, x_variable, y_variables, plot_title):
"""
This function is used to map x- and y-variables against each other
Arguments:
df: Pandas dataframe.
x_variable: String. Name of the column that we want to set as the
x-variable in the plot
y_variables: string (single), or list of strings (multiple). Name(s)
of the column(s) that we want to set as the y-variable in the plot
Outputs:
#Plot the 'TotalBaseNonWaterVolume' variable over time
generate_plot(dataframe, x_variable='JobStartDate',
y_variables=['TotalBaseNonWaterVolume'],
plot_title='Total Base Non-Water Volume for Fracs over Time')
def generate_boxplot(df, x_variable):
"""
This function generates a basic histogram of a column's data, with
outliers removed
Arguments:
df: Pandas dataframe
x_variable: String. Name of the column that we want to generate
boxplot from
Outputs:
Box plot in console.
def generate_bar_graph(df, title):
"""
This function creates a bar graph from pandas dataframe columns.
Arguments:
df: Pandas dataframe. Index will be x-axis. Categories and
associated amounts are from columns
title: String. Name of the bar graph
Outputs:
Bar graph in console.
"""
import pandas as pd
import matplotlib.pyplot as plt
class fracfocus_data_search:
"""
This class generates an object that is used the filter the master
fracfocus dataframe so it only contains a certain state/state abbreviation,
county (list), and/or operator
"""
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
api: API that we're connected to
series_ID: string. Name of the series that we want to pull from the EIA API
"""
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
##Create a pandas dataframe from the retrieved time series
df = pd.DataFrame(series_search)
def decompose_time_series(series):
"""
Decompose a time series and plot it in the console
Arguments:
series: series. Time series that we want to decompose
Outputs:
Decomposition plot in the console
"""
result = seasonal_decompose(series, model='additive')
result.plot()
def retrieve_time_series(api, series_ID):
"""
Return the time series dataframe, based on API and unique Series ID
api: API that we're connected to
series_ID: string. Name of the series that we want to pull from the EIA API
"""
#Retrieve Data By Series ID
series_search = api.data_by_series(series=series_ID)
##Create a pandas dataframe from the retrieved time series
df = pd.DataFrame(series_search)