This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def is_state_dict_equal(dict1, dict2): | |
import torch | |
for key in dict1: | |
if key not in dict2: | |
print(f"Key {key} not in second dict") | |
return False | |
if not torch.all(torch.eq(dict1[key], dict2[key])): | |
print(f"Difference in values for key {key}") | |
return False | |
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def custom_dataloader(dataset: Dataset, batch_size=16): | |
random_indices = torch.randperm(len(dataset['tokens']) - context_size) | |
for idx in range(0, len(random_indices), batch_size): | |
x = torch.stack([ | |
dataset['tokens'][i: i+context_size] | |
for i in random_indices[idx: idx+batch_size] | |
]) | |
y = torch.stack([ | |
dataset['tokens'][i+1: i+context_size+1] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def compute_sample_weight(treatment_label, propensity_score, enforce_positivity=True, max_sample_weight=1e3): | |
""" | |
Demystifying Double Robustness | |
https://projecteuclid.org/download/pdfview_1/euclid.ss/1207580167 | |
https://arxiv.org/pdf/1706.10029.pdf | |
weights = [ti/g(Xi) + (1−ti)/(1−g(Xi))] | |
:param treatment_label: known treatment labels | |
:param propensity_score: estimated propensity scores | |
:param enforce_positivity: self explanatory | |
:param max_sample_weight: this is to prevent inf in subsquent calculation. N |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# >>> All aliases >>> | |
alias c='clear' | |
# ls shortcuts | |
alias ll='ls -alF' | |
alias la='ls -A' | |
alias l='ls -CF' | |
# <<< End of aliases <<< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import matplotlib.pyplot as graph | |
from sklearn.datasets import load_boston | |
from sklearn.metrics import r2_score | |
from sklearn.model_selection import train_test_split | |
from sklearn.tree import DecisionTreeRegressor | |
from tqdm import trange | |
x, y = load_boston(return_X_y=True) # type: np.ndarray, np.ndarray |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# CREATE DIRECTORY ON DBFS FOR LOGS | |
LOG_DIR=/dbfs/databricks/scripts/logs/$DB_CLUSTER_ID/dask/ | |
HOSTNAME=`hostname` | |
mkdir -p $LOG_DIR | |
# INSTALL DASK AND OTHER DEPENDENCIES | |
set -ex | |
/databricks/python/bin/python -V | |
. /databricks/conda/etc/profile.d/conda.sh | |
conda activate /databricks/python |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow.keras as k | |
import tensorflow.keras.backend as K | |
def _one_hot_layer(num_classes: int): | |
""" | |
One hot encoding layer to save massive amounts of memory in Keras | |
:param num_classes: | |
:return: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import sklearn.linear_model as lm | |
class FastLasso: | |
def __init__(self, verbose=False): | |
self.alphas, self.coefs = 2*[None] | |
self.score_path_ = None | |
self.best_iteration_ = -1 | |
self.best_score_ = -np.inf | |
self.verbose = verbose |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.base import BaseEstimator, RegressorMixin | |
from sklearn.preprocessing import OneHotEncoder | |
from sklearn.exceptions import NotFittedError | |
class StratifiedDummyRegressor(BaseEstimator, RegressorMixin): | |
""" | |
An extremely scalable dummy regression model for computing the mean for each group specified by a column. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xgboost as xgb | |
# Notice the large number of trees and the low learning rate. | |
# There are other important parameters like `subsample`, `min_child_weight` `colsample_bytree` but I'll leave that up | |
# to you and grid searching. | |
gbm = xgb.XGBRFRegressor(n_estimators=10000, learning_rate=0.01, n_jobs=-1) | |
# Training with automatic termination | |
gbm.fit( | |
x_train, y_train, |
NewerOlder