Kirsten Perry kperry2215

Data scientist, Solar @ National Renewable Energy Lab. Formerly data scientist @ BP

kperry2215 / sesd_algorithm.py

Created August 24, 2019 00:28

	def sesd_anomaly_detection(dataframe,
	column_name,
	desired_frequency,
	max_anomalies,
	alpha_level):
	"""
	In this definition, time series anomalies are detected using the S-ESD algorithm.
	Arguments:
	dataframe: Pandas dataframe
	column_name: string. Name of the column that we want to detect anomalies in

kperry2215 / stl_decomposition.py

Created August 24, 2019 00:26

	def decompose_time_series(series, desired_frequency):
	"""
	Perform STL decomposition on the time series.
	Arguments:
	series: Pandas series. Time series sequence that we wish to decompose.
	desired_frequency: Integer. Time frequency of the series. If we want to detect
	a yearly trend, we'd set the value equal to 365.
	Outputs:
	Plot of time series STL decomposition.
	"""

kperry2215 / isolation_forest.py

Created August 24, 2019 00:25

	def isolation_forest_anomaly_detection(df,
	column_name,
	outliers_fraction):
	"""
	In this definition, time series anomalies are detected using an Isolation Forest algorithm.
	Arguments:
	df: Pandas dataframe
	column_name: string. Name of the column that we want to detect anomalies in
	outliers_fraction: float. Percentage of outliers allowed in the sequence.
	Outputs:

kperry2215 / low_pass_filter.py

Created August 24, 2019 00:23

	def low_pass_filter_anomaly_detection(df,
	column_name,
	number_of_stdevs_away_from_mean):
	"""
	Implement a low-pass filter to detect anomalies in a time series, and save the filter outputs
	(True/False) to a new column in the dataframe.
	Arguments:
	df: Pandas dataframe
	column_name: string. Name of the column that we want to detect anomalies in
	number_of_stdevs_away_from_mean: float. Number of standard deviations away from

kperry2215 / generate_anomalies.py

Created August 24, 2019 00:22

	#Add in a couple anomalous data points for detection by the algorithm
	anomaly_dictionary={80: 3.1,
	200: 3,
	333: 1,
	600: 2.6,
	710: 2.1,
	890: 2.3,
	1100: 1,
	1211: 2.6,
	1309: 2.3}

kperry2215 / plot_gasoline_prices.py

Created August 24, 2019 00:20

	import pandas as pd
	import matplotlib.pyplot as plt
	import eia

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	Arguments:
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API

kperry2215 / ruptures.py

Last active August 15, 2019 02:12

	#Create EIA API using your specific API key
	api_key = 'YOUR API KEY HERE'
	api = eia.API(api_key)

	#Pull the oil WTI price data
	series_ID='PET.RWTC.D'
	#Retrieve Data By Series ID
	series_search = api.data_by_series(series=series_ID)
	##Create a pandas dataframe from the retrieved time series
	price_df = pd.DataFrame(series_search)

kperry2215 / ruptures_code.py

Last active August 15, 2019 02:06

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API
	"""
	#Retrieve Data By Series ID
	series_search = api.data_by_series(series=series_ID)
	##Create a pandas dataframe from the retrieved time series
	df = pd.DataFrame(series_search)

kperry2215 / ruptures_example.py

Last active August 15, 2019 02:03

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API
	"""
	#Retrieve Data By Series ID
	series_search = api.data_by_series(series=series_ID)
	##Create a pandas dataframe from the retrieved time series
	df = pd.DataFrame(series_search)

kperry2215 / feature_importances.py

Created July 30, 2019 22:38

	#Obtain feature importances in the model
	feature_importances = pd.DataFrame(rf.feature_importances_,
	index = feature_list,
	columns=['importance']).sort_values('importance',
	ascending=False)
	print(feature_importances)