Kirsten Perry kperry2215

Data scientist, Solar @ National Renewable Energy Lab. Formerly data scientist @ BP

kperry2215 / plot_time_series.py

Created July 20, 2019 02:44

	#Pull in natural gas time series data
	series_ID='NG.N3035TX3.M'
	nat_gas_df=retrieve_time_series(api, series_ID)
	nat_gas_df.reset_index(level=0, inplace=True)
	#Rename the columns
	nat_gas_df.rename(columns={'index':'Date',
	nat_gas_df.columns[1]:'Nat_Gas_Price_MCF'},
	inplace=True)
	#Convert the Date column into a date object
	nat_gas_df['Date']=pd.to_datetime(nat_gas_df['Date'])

kperry2215 / plot_ts.py

Created July 20, 2019 02:46

	#Pull in natural gas time series data
	series_ID='NG.N3035TX3.M'
	nat_gas_df=retrieve_time_series(api, series_ID)
	nat_gas_df.reset_index(level=0, inplace=True)
	#Rename the columns
	nat_gas_df.rename(columns={'index':'Date',
	nat_gas_df.columns[1]:'Nat_Gas_Price_MCF'},
	inplace=True)
	#Convert the Date column into a date object
	nat_gas_df['Date']=pd.to_datetime(nat_gas_df['Date'])

kperry2215 / transform_and_difference.py

Created July 20, 2019 02:47

	#Transform the columns using natural log
	master_df['Electricity_Price_Transformed']=np.log(master_df['Electricity_Price'])
	master_df['Nat_Gas_Price_MCF_Transformed']=np.log(master_df['Nat_Gas_Price_MCF'])

	#Difference the data by 1 month
	n=1
	master_df['Electricity_Price_Transformed_Differenced'] = master_df['Electricity_Price_Transformed'] - master_df['Electricity_Price_Transformed'].shift(n)
	master_df['Nat_Gas_Price_MCF_Transformed_Differenced'] = master_df['Nat_Gas_Price_MCF_Transformed'] - master_df['Nat_Gas_Price_MCF_Transformed'].shift(n)

kperry2215 / augmented_dickey_fuller.py

Created July 20, 2019 02:49

	def augmented_dickey_fuller_statistics(time_series):
	"""
	Run the augmented Dickey-Fuller test on a time series
	to determine if it's stationary.
	Arguments:
	time_series: series. Time series that we want to test
	Outputs:
	Test statistics for the Augmented Dickey Fuller test in
	the console
	"""

kperry2215 / build_VAR_model.py

Created July 20, 2019 02:50

	#Convert the dataframe to a numpy array
	master_array=np.array(master_df[['Electricity_Price_Transformed_Differenced',
	'Nat_Gas_Price_MCF_Transformed_Differenced']].dropna())

	#Generate a training and test set for building the model: 95/5 split
	training_set = master_array[:int(0.95*(len(master_array)))]
	test_set = master_array[int(0.95*(len(master_array))):]

	#Fit to a VAR model
	model = VAR(endog=training_set)

kperry2215 / model_accuracy.py

Created July 20, 2019 02:52

	def calculate_model_accuracy_metrics(actual, predicted):
	"""
	Output model accuracy metrics, comparing predicted values
	to actual values.
	Arguments:
	actual: list. Time series of actual values.
	predicted: list. Time series of predicted values
	Outputs:
	Forecast bias metrics, mean absolute error, mean squared error,
	and root mean squared error in the console

kperry2215 / accuracy_script.py

Created July 20, 2019 02:53

	def calculate_model_accuracy_metrics(actual, predicted):
	"""
	Output model accuracy metrics, comparing predicted values
	to actual values.
	Arguments:
	actual: list. Time series of actual values.
	predicted: list. Time series of predicted values
	Outputs:
	Forecast bias metrics, mean absolute error, mean squared error,
	and root mean squared error in the console

kperry2215 / accuracy.py

Created July 20, 2019 02:54

	def calculate_model_accuracy_metrics(actual, predicted):
	"""
	Output model accuracy metrics, comparing predicted values
	to actual values.
	Arguments:
	actual: list. Time series of actual values.
	predicted: list. Time series of predicted values
	Outputs:
	Forecast bias metrics, mean absolute error, mean squared error,
	and root mean squared error in the console

kperry2215 / gist:6ea4380acde06025be0a6fdba82c02ce

Created July 30, 2019 04:28

	import eia
	import pandas as pd

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API
	"""
	#Retrieve Data By Series ID

kperry2215 / visualize_electricity_demand_data.py

Created July 30, 2019 04:28

	import eia
	import pandas as pd

	def retrieve_time_series(api, series_ID):
	"""
	Return the time series dataframe, based on API and unique Series ID
	api: API that we're connected to
	series_ID: string. Name of the series that we want to pull from the EIA API
	"""
	#Retrieve Data By Series ID