This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.ensemble import RandomForestRegressor | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.datasets import load_boston | |
sns.set_style("whitegrid") | |
X, y = load_boston(return_X_y=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(tidy_docs | |
.groupby("doc_id") | |
.apply(lambda x: x.assign( | |
prev_token = lambda x: x.token.shift(1), | |
next_token = lambda x: x.token.shift(-1)) | |
) | |
.reset_index(drop=True) | |
.query("tag == 'POS'") | |
.loc[:, ["doc_id", "prev_token", "token", "next_token"]] | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tidy_tokens(docs): | |
"""Extract tokens and metadata from list of spaCy docs.""" | |
cols = [ | |
"doc_id", "token", "token_order", "lemma", | |
"ent_type", "tag", "dep", "pos", "is_stop", | |
"is_alpha", "is_digit", "is_punct" | |
] | |
meta_df = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_tokens_plus_meta(doc:spacy.tokens.doc.Doc): | |
"""Extract tokens and metadata from individual spaCy doc.""" | |
return [ | |
(i.text, i.i, i.lemma_, i.ent_type_, i.tag_, | |
i.dep_, i.pos_, i.is_stop, i.is_alpha, | |
i.is_digit, i.is_punct) for i in doc | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def calcualte_exceedance_probability(exceed_values, posterior): | |
n = posterior.shape[1] | |
ix = [] | |
probs_m = [] | |
probs_05 = [] | |
probs_95 = [] | |
for i in exceed_values: | |
p = ((posterior>i).sum(1)/n) | |
p05 = np.quantile(p, 0.01) | |
p95 = np.quantile(p, 0.99) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ev_model = pm.Model() | |
with ev_model: | |
loc = pm.Normal("loc", 5, 10) | |
scale = pm.HalfCauchy("scale", 1) | |
lik = pm.Gumbel("lik", loc, scale, observed=block_maxima_month.wind_speed) | |
ev_trace = pm.sample() | |
norm_model = pm.Model() | |
with norm_model: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin | |
class CustomClassifier(BaseEstimator, ClassifierMixin): | |
def __init__(self): | |
pass | |
def fit(self, X, y=None): | |
pass | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, ClassifierMixin | |
from sklearn.ensemble import GradientBoostingClassifier | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.preprocessing import OneHotEncoder | |
class TreeEmbeddingLogisticRegression(BaseEstimator, ClassifierMixin): | |
"""Fits a logistic regression model on tree embeddings. | |
""" | |
def __init__(self, **kwargs): | |
self.kwargs = kwargs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.optimize import curve_fit | |
from sklearn.base import BaseEstimator, RegressorMixin | |
import statsmodels.api as sm | |
class ExponentialDecayRegressor(BaseEstimator, RegressorMixin): | |
"""Fits an exponential decay curve | |
""" | |
def __init__(self, starting_values=[1.,1.e-5,1.], **kwargs,): | |
self.starting_values = starting_values |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.base import BaseEstimator, RegressorMixin | |
import statsmodels.api as sm | |
class QuantileRegression(BaseEstimator, RegressorMixin): | |
"""Sklearn wrapper for statsmodels Quantile Regression | |
""" | |
def __init__(self, quantile=0.5, **kwargs): | |
self.quantile = quantile | |
self.kwargs = kwargs | |
self.model = None |
NewerOlder