Skip to content

Instantly share code, notes, and snippets.

View kperry2215's full-sized avatar

Kirsten Perry kperry2215

View GitHub Profile
import pandas as pd
import matplotlib.pyplot as plt
class fracfocus_data_search:
"""
This class generates an object that is used the filter the master
fracfocus dataframe so it only contains a certain state/state abbreviation,
county (list), and/or operator
"""
def generate_bar_graph(df, title):
"""
This function creates a bar graph from pandas dataframe columns.
Arguments:
df: Pandas dataframe. Index will be x-axis. Categories and
associated amounts are from columns
title: String. Name of the bar graph
Outputs:
Bar graph in console.
"""
def generate_boxplot(df, x_variable):
"""
This function generates a basic histogram of a column's data, with
outliers removed
Arguments:
df: Pandas dataframe
x_variable: String. Name of the column that we want to generate
boxplot from
Outputs:
Box plot in console.
#Plot the 'TotalBaseNonWaterVolume' variable over time
generate_plot(dataframe, x_variable='JobStartDate',
y_variables=['TotalBaseNonWaterVolume'],
plot_title='Total Base Non-Water Volume for Fracs over Time')
def generate_plot(df, x_variable, y_variables, plot_title):
"""
This function is used to map x- and y-variables against each other
Arguments:
df: Pandas dataframe.
x_variable: String. Name of the column that we want to set as the
x-variable in the plot
y_variables: string (single), or list of strings (multiple). Name(s)
of the column(s) that we want to set as the y-variable in the plot
Outputs:
#Pull all of the fracfocus data from a csv
fracfocus_registry=pd.read_csv('fracfocus_data_example.csv', low_memory=False)
#Make all of the state column lowercase
fracfocus_filter=fracfocus_data_search(state='Texas', state_abbreviation='TX',
county_list=['Andrews', 'Borden', 'Crane', 'Dawson',
'Ector', 'Eddy', 'Gaines', 'Glasscock'], operator='XTO')
#Filter dataframe by its parameters
subsetted_df=fracfocus_filter.filter_dataframe(fracfocus_registry, column_state='StateName',
column_county='CountyName', column_operator='OperatorName')
class fracfocus_data_search:
"""
This class generates an object that is used the filter the master
fracfocus dataframe so it only contains a certain state/state abbreviation,
county (list), and/or operator
"""
def __init__(self, state=None, state_abbreviation=None, county_list=None,
operator=None):
#All data in initialize def optional depending on what kind of filtering
#Import desired packages
import requests
import zipfile
import io
import pandas as pd
def pull_zip_file_from_url(url):
"""
This function pulls a zip file from a URL and generates a ZipFile object
Arguments:
"""
This script is used to query data directly from the EPA's Envirofacts API, and land in
a pandas data frame. In this script, the GHG data tables are pulled and merged together to
create a master dataframe containing all of the GHG data we'd need for analysis:
facility location, sector, and subsector, and emissions and emission type by year
"""
import pandas as pd
import io
import requests
import pandas as pd
from scipy.optimize import curve_fit
import numpy as np
import matplotlib.pyplot as plt
def read_in_csv(file_path):
"""
Read in the specified csv as a pandas dataframe
Arguments:
file_path: String. Path for the csv file that we want to read in