Skip to content

Instantly share code, notes, and snippets.

@Skanda319
Skanda319 / tennis_simulation.py
Last active December 26, 2018 05:33
play a tennis match point by point
#we use a psuedo-random number generator
#to pull a random number and determine
#if a server has won a point or not
#theoretically, we could use a more advanced function
#that takes in a distribution and generates random numbers
#based on that
from random import random
#############################################################################################
#define what a big point is and
#cleaned up original code to work with python 3.6
#results match the output from the python 2.7 version
#filter and map functions have been changed between 3.6 and 2.7
import numpy as np
import pandas as pd
import os
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import cosine_similarity
#import packages for use later in the HMM code
import pandas as pd
import sklearn.mixture as mix
import numpy as np
import scipy.stats as scs
import datetime as dt
X_train = train[["date", "return", "range", "close"]].set_index("date")
X_test = test[["date", "return", "range", "close"]].set_index("date")
model = mix.GaussianMixture(n_components=3,
covariance_type="full",
n_init=100,
random_state=7).fit(X_train)
# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X_test)
sns.set(font_scale=1.25)
style_kwds = {'xtick.major.size': 3, 'ytick.major.size': 3,
'font.family':u'courier prime code', 'legend.frameon': True}
sns.set_style('white', style_kwds)
fig, axs = plt.subplots(model.n_components, sharex=True, sharey=True, figsize=(12,9))
colors = cm.rainbow(np.linspace(0, 1, model.n_components))
for i, (ax, color) in enumerate(zip(axs, colors)):
# Use fancy indexing to plot data in each state.
sns.set(font_scale=1.5)
states = (pd.DataFrame(hidden_states, columns=['states'], index=X_test.index)
.join(X_test, how='inner')
.reset_index(drop=False)
.rename(columns={'index':'Date'}))
states.head()
#suppressing warnings because of some issues with the font package
#in general, would not rec turning off warnings.
import warnings
import pandas as pd
import io
import requests
url = "https://coinmetrics.io/newdata/"+"btc"+".csv"
s = requests.get(url).content
btc_data = pd.read_csv(io.StringIO(s.decode('utf-8')))
#we add this in to tag a larger data frame if we were
#pulling data for multiple coins
x_cols = ['AdrActCnt', 'BlkCnt', 'BlkSizeByte', 'BlkSizeMeanByte', 'CapMVRVCur',
'CapMrktCurUSD', 'CapRealUSD', 'DiffMean', 'FeeMeanNtv', 'FeeMeanUSD', 'IssContNtv',
'IssContPctAnn', 'IssContUSD', 'IssTotNtv', 'IssTotUSD', 'NVTAdj',
'NVTAdj90','TxCnt', 'TxTfrCnt', 'TxTfrValAdjNtv', 'TxTfrValAdjUSD',
'TxTfrValMeanNtv', 'TxTfrValMeanUSD', 'TxTfrValMedNtv',
'TxTfrValMedUSD', 'TxTfrValNtv', 'TxTfrValUSD', 'VtyDayRet180d',
'VtyDayRet30d', 'VtyDayRet60d']
y_col = ['PriceUSD']
# implements a rolling ridge regression of length window
window = 250 # train window
dt_list = []
pred_list = []
up1sd =[]
down1sd = []
rsq = []