Skip to content

Instantly share code, notes, and snippets.

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_boston
sns.set_style("whitegrid")
X, y = load_boston(return_X_y=True)
(tidy_docs
.groupby("doc_id")
.apply(lambda x: x.assign(
prev_token = lambda x: x.token.shift(1),
next_token = lambda x: x.token.shift(-1))
)
.reset_index(drop=True)
.query("tag == 'POS'")
.loc[:, ["doc_id", "prev_token", "token", "next_token"]]
)
def tidy_tokens(docs):
"""Extract tokens and metadata from list of spaCy docs."""
cols = [
"doc_id", "token", "token_order", "lemma",
"ent_type", "tag", "dep", "pos", "is_stop",
"is_alpha", "is_digit", "is_punct"
]
meta_df = []
def extract_tokens_plus_meta(doc:spacy.tokens.doc.Doc):
"""Extract tokens and metadata from individual spaCy doc."""
return [
(i.text, i.i, i.lemma_, i.ent_type_, i.tag_,
i.dep_, i.pos_, i.is_stop, i.is_alpha,
i.is_digit, i.is_punct) for i in doc
]
def calcualte_exceedance_probability(exceed_values, posterior):
n = posterior.shape[1]
ix = []
probs_m = []
probs_05 = []
probs_95 = []
for i in exceed_values:
p = ((posterior>i).sum(1)/n)
p05 = np.quantile(p, 0.01)
p95 = np.quantile(p, 0.99)
ev_model = pm.Model()
with ev_model:
loc = pm.Normal("loc", 5, 10)
scale = pm.HalfCauchy("scale", 1)
lik = pm.Gumbel("lik", loc, scale, observed=block_maxima_month.wind_speed)
ev_trace = pm.sample()
norm_model = pm.Model()
with norm_model:
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
class CustomClassifier(BaseEstimator, ClassifierMixin):
def __init__(self):
pass
def fit(self, X, y=None):
pass
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
class TreeEmbeddingLogisticRegression(BaseEstimator, ClassifierMixin):
"""Fits a logistic regression model on tree embeddings.
"""
def __init__(self, **kwargs):
self.kwargs = kwargs
from scipy.optimize import curve_fit
from sklearn.base import BaseEstimator, RegressorMixin
import statsmodels.api as sm
class ExponentialDecayRegressor(BaseEstimator, RegressorMixin):
"""Fits an exponential decay curve
"""
def __init__(self, starting_values=[1.,1.e-5,1.], **kwargs,):
self.starting_values = starting_values
from sklearn.base import BaseEstimator, RegressorMixin
import statsmodels.api as sm
class QuantileRegression(BaseEstimator, RegressorMixin):
"""Sklearn wrapper for statsmodels Quantile Regression
"""
def __init__(self, quantile=0.5, **kwargs):
self.quantile = quantile
self.kwargs = kwargs
self.model = None