This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import packages | |
import matplotlib as plt | |
import seaborn as sns | |
import pandas as pd | |
# set seaborn style | |
sns.set(rc={'figure.figsize':(12,8)}) | |
sns.set(font_scale=1.5) | |
# create UKDS color palette |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.linear_model import LinearRegression | |
from sklearn.preprocessing import PolynomialFeatures | |
from sklearn.metrics import mean_absolute_error | |
def train_polynomial(df, train_upto,train_degree): | |
# build Polynomial features up to degree train_degree | |
p = PolynomialFeatures(degree=train_degree).fit(df[['dv','psi','temp1','temp2']]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import pandas as pd | |
import numpy as np | |
# Create sample data | |
df = pd.DataFrame(columns=['A','B','C']) | |
df.loc[0] = [12,42,'test'] | |
# load stored model | |
loaded_model = pickle.load(open('Pickled_Model.pkl', 'rb')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import PolynomialFeatures | |
p = PolynomialFeatures(degree=2).fit(df[['feature1','feature2]]) | |
features = pd.DataFrame(p.transform(df[['feature1','feature2]]), columns=p.get_feature_names(df[['feature1','feature2]].columns)) | |
features.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
path = r'path to file' # use your path | |
all_files = glob.glob(path + "/*.csv") | |
li = [] | |
for filename in all_files: | |
df = pd.read_csv(filename, index_col=None, header=0,sep=',|;') | |
print(filename) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_multi(data, cols=None, spacing=.1, **kwargs): | |
from pandas import plotting | |
# Get default color style from pandas - can be changed to any other color list | |
if cols is None: cols = data.columns | |
if len(cols) == 0: return | |
colors = getattr(getattr(plotting, '_style'), '_get_standard_colors')(num_colors=len(cols)) | |
# First axis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import plotly as py | |
py.tools.set_credentials_file(username='YOURE_USERNAME', api_key='YOUR API KEY') | |
import cufflinks as cf | |
import pandas as pd | |
df = pd.read_csv('data.csv') | |
py.plotly.iplot([{ | |
'x': df.var0, | |
'y': df[col], |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
# Visualisation | |
import matplotlib as mpl | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
# Configure visualisations | |
%matplotlib inline | |
mpl.style.use( 'ggplot' ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Random Forest params | |
rf_param_grid = {"max_depth": [None], | |
"max_features": [1, 3], | |
"min_samples_split": [2, 3], | |
"min_samples_leaf": [1, 3], | |
"bootstrap": [False], | |
"n_estimators" :[100], | |
"criterion": ["gini"]} | |
## Extra Trees params |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
import numpy as np | |
from sklearn.model_selection import GridSearchCV, StratifiedKFold, learning_curve | |
from sklearn.ensemble import GradientBoostingClassifier | |
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, | |
n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)): | |
"""Generate a simple plot of the test and training learning curve""" | |
plt.figure() |
NewerOlder