Skip to content

Instantly share code, notes, and snippets.

def chooseBestKforKMeans(scaled_data, k_range):
ans = []
for k in k_range:
scaled_inertia = kMeansRes(scaled_data, k)
ans.append((k, scaled_inertia))
results = pd.DataFrame(ans, columns = ['k','Scaled Inertia']).set_index('k')
best_k = results.idxmin()[0]
return best_k, results
# How to use TimeBasedCV
data_for_modeling=pd.read_csv('data.csv', parse_dates=['record_date'])
tscv = TimeBasedCV(train_period=30,
test_period=7,
freq='days')
for train_index, test_index in tscv.split(data_for_modeling,
validation_split_date=datetime.date(2019,2,1), date_column='record_date'):
print(train_index, test_index)
# get number of splits
import pandas as pd
import datetime
from datetime import datetime as dt
from dateutil.relativedelta import *
class TimeBasedCV(object):
'''
Parameters
----------
train_period: int