Skanda319’s gists

Skanda319 / tennis_simulation.py

Last active December 26, 2018 05:33

play a tennis match point by point

	#we use a psuedo-random number generator
	#to pull a random number and determine
	#if a server has won a point or not
	#theoretically, we could use a more advanced function
	#that takes in a distribution and generates random numbers
	#based on that
	from random import random

	#############################################################################################
	#define what a big point is and

Skanda319 / cosine_similarty_of_text.py

Created January 12, 2019 23:39

	#cleaned up original code to work with python 3.6
	#results match the output from the python 2.7 version
	#filter and map functions have been changed between 3.6 and 2.7
	import numpy as np
	import pandas as pd
	import os
	import re
	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.metrics.pairwise import euclidean_distances
	from sklearn.metrics.pairwise import cosine_similarity

Skanda319 / IEX_data_collect.py

Last active April 22, 2019 00:03

	#import packages for use later in the HMM code

	import pandas as pd
	import sklearn.mixture as mix

	import numpy as np
	import scipy.stats as scs

	import datetime as dt

Skanda319 / HMM_for_Regime_Detection.py

Created April 22, 2019 00:23

	X_train = train[["date", "return", "range", "close"]].set_index("date")
	X_test = test[["date", "return", "range", "close"]].set_index("date")

	model = mix.GaussianMixture(n_components=3,
	covariance_type="full",
	n_init=100,
	random_state=7).fit(X_train)

	# Predict the optimal sequence of internal hidden state
	hidden_states = model.predict(X_test)

Skanda319 / Plot_Hidden_States.py

Created April 22, 2019 00:45

	sns.set(font_scale=1.25)
	style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3,
	'font.family':u'courier prime code', 'legend.frameon': True}
	sns.set_style('white', style_kwds)

	fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True, figsize=(12,9))
	colors = cm.rainbow(np.linspace(0, 1, model.n_components))

	for i, (ax, color) in enumerate(zip(axs, colors)):
	# Use fancy indexing to plot data in each state.

Skanda319 / Plot_Regimes.py

Last active April 22, 2019 00:57

	sns.set(font_scale=1.5)
	states = (pd.DataFrame(hidden_states, columns=['states'], index=X_test.index)
	.join(X_test, how='inner')
	.reset_index(drop=False)
	.rename(columns={'index':'Date'}))
	states.head()

	#suppressing warnings because of some issues with the font package
	#in general, would not rec turning off warnings.
	import warnings

Skanda319 / coin_metrics_fetch.py

Created October 18, 2019 00:33

	import pandas as pd
	import io
	import requests

	url = "https://coinmetrics.io/newdata/"+"btc"+".csv"
	s = requests.get(url).content
	btc_data = pd.read_csv(io.StringIO(s.decode('utf-8')))

	#we add this in to tag a larger data frame if we were
	#pulling data for multiple coins

Skanda319 / coinmetrics_features_prep.py

Created October 18, 2019 00:46

	x_cols = ['AdrActCnt', 'BlkCnt', 'BlkSizeByte', 'BlkSizeMeanByte', 'CapMVRVCur',
	'CapMrktCurUSD', 'CapRealUSD', 'DiffMean', 'FeeMeanNtv', 'FeeMeanUSD', 'IssContNtv',
	'IssContPctAnn', 'IssContUSD', 'IssTotNtv', 'IssTotUSD', 'NVTAdj',
	'NVTAdj90','TxCnt', 'TxTfrCnt', 'TxTfrValAdjNtv', 'TxTfrValAdjUSD',
	'TxTfrValMeanNtv', 'TxTfrValMeanUSD', 'TxTfrValMedNtv',
	'TxTfrValMedUSD', 'TxTfrValNtv', 'TxTfrValUSD', 'VtyDayRet180d',
	'VtyDayRet30d', 'VtyDayRet60d']

	y_col = ['PriceUSD']

Skanda319 / btc_ridge.py

Created October 18, 2019 01:15

	# implements a rolling ridge regression of length window

	window = 250 # train window

	dt_list = []
	pred_list = []
	up1sd =[]
	down1sd = []
	rsq = []