This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def chooseBestKforKMeans(scaled_data, k_range): | |
ans = [] | |
for k in k_range: | |
scaled_inertia = kMeansRes(scaled_data, k) | |
ans.append((k, scaled_inertia)) | |
results = pd.DataFrame(ans, columns = ['k','Scaled Inertia']).set_index('k') | |
best_k = results.idxmin()[0] | |
return best_k, results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# How to use TimeBasedCV | |
data_for_modeling=pd.read_csv('data.csv', parse_dates=['record_date']) | |
tscv = TimeBasedCV(train_period=30, | |
test_period=7, | |
freq='days') | |
for train_index, test_index in tscv.split(data_for_modeling, | |
validation_split_date=datetime.date(2019,2,1), date_column='record_date'): | |
print(train_index, test_index) | |
# get number of splits |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import datetime | |
from datetime import datetime as dt | |
from dateutil.relativedelta import * | |
class TimeBasedCV(object): | |
''' | |
Parameters | |
---------- | |
train_period: int |