Kirsten Perry kperry2215

Data scientist, Solar @ National Renewable Energy Lab. Formerly data scientist @ BP

kperry2215 / pull_eia_data.py

Last active July 20, 2019 02:34

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API
	"""
	#Retrieve Data By Series ID
	series_search = api.data_by_series(series=series_ID)
	##Create a pandas dataframe from the retrieved time series
	df = pd.DataFrame(series_search)

kperry2215 / fracfocus_analysis.py

Created July 15, 2019 00:10

	import pandas as pd
	import matplotlib.pyplot as plt

	class fracfocus_data_search:
	"""
	This class generates an object that is used the filter the master
	fracfocus dataframe so it only contains a certain state/state abbreviation,
	county (list), and/or operator
	"""

kperry2215 / generate_bar_graph.py

Created July 15, 2019 00:09

	def generate_bar_graph(df, title):
	"""
	This function creates a bar graph from pandas dataframe columns.
	Arguments:
	df: Pandas dataframe. Index will be x-axis. Categories and
	associated amounts are from columns
	title: String. Name of the bar graph
	Outputs:
	Bar graph in console.
	"""

kperry2215 / generate_boxplot.py

Created July 15, 2019 00:08

	def generate_boxplot(df, x_variable):
	"""
	This function generates a basic histogram of a column's data, with
	outliers removed
	Arguments:
	df: Pandas dataframe
	x_variable: String. Name of the column that we want to generate
	boxplot from
	Outputs:
	Box plot in console.

kperry2215 / generate_plot_nonwater_vol.py

Created July 15, 2019 00:06

	#Plot the 'TotalBaseNonWaterVolume' variable over time
	generate_plot(dataframe, x_variable='JobStartDate',
	y_variables=['TotalBaseNonWaterVolume'],
	plot_title='Total Base Non-Water Volume for Fracs over Time')

kperry2215 / generate_plot.py

Created July 15, 2019 00:05

	def generate_plot(df, x_variable, y_variables, plot_title):
	"""
	This function is used to map x- and y-variables against each other
	Arguments:
	df: Pandas dataframe.
	x_variable: String. Name of the column that we want to set as the
	x-variable in the plot
	y_variables: string (single), or list of strings (multiple). Name(s)
	of the column(s) that we want to set as the y-variable in the plot
	Outputs:

kperry2215 / filter_fracfocus.py

Created July 15, 2019 00:04

	#Pull all of the fracfocus data from a csv
	fracfocus_registry=pd.read_csv('fracfocus_data_example.csv', low_memory=False)
	#Make all of the state column lowercase
	fracfocus_filter=fracfocus_data_search(state='Texas', state_abbreviation='TX',
	county_list=['Andrews', 'Borden', 'Crane', 'Dawson',
	'Ector', 'Eddy', 'Gaines', 'Glasscock'], operator='XTO')
	#Filter dataframe by its parameters
	subsetted_df=fracfocus_filter.filter_dataframe(fracfocus_registry, column_state='StateName',
	column_county='CountyName', column_operator='OperatorName')

kperry2215 / fracfocus_filter_class.py

Last active July 13, 2019 18:26

	class fracfocus_data_search:
	"""
	This class generates an object that is used the filter the master
	fracfocus dataframe so it only contains a certain state/state abbreviation,
	county (list), and/or operator
	"""

	def __init__(self, state=None, state_abbreviation=None, county_list=None,
	operator=None):
	#All data in initialize def optional depending on what kind of filtering

kperry2215 / frac_focus.py

Last active July 5, 2019 06:29

	#Import desired packages
	import requests
	import zipfile
	import io
	import pandas as pd

	def pull_zip_file_from_url(url):
	"""
	This function pulls a zip file from a URL and generates a ZipFile object
	Arguments:

kperry2215 / epa_api.py

Created July 5, 2019 06:16

	"""
	This script is used to query data directly from the EPA's Envirofacts API, and land in
	a pandas data frame. In this script, the GHG data tables are pulled and merged together to
	create a master dataframe containing all of the GHG data we'd need for analysis:
	facility location, sector, and subsector, and emissions and emission type by year
	"""
	import pandas as pd
	import io
	import requests