This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)" | |
echo "ZSH=$HOME/.oh-my-zsh" >> ~/.zshrc | |
echo "source $ZSH/oh-my-zsh.sh" >> ~/.zshrc | |
echo "ZSH_THEME='robbyrussel'" >> ~/.zshrc | |
echo "plugins = (git python osx web-search vi-mode dotenv" >> ~/.zshrc | |
echo "alias config='/usr/bin/git --git-dir=$HOME/.cfg/ --work-tree=$HOME'" >> ~/.zshrc | |
source ~/.zshrc | |
echo ".cfg" >> .gitignore | |
git clone --bare https://github.com/ben0it8/dotfiles.git .cfg/ | |
config checkout |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import tarfile | |
from tqdm import tqdm | |
# path to data | |
DATA_DIR = os.path.abspath('./data') | |
# path to IMDB | |
IMDB_DIR = os.path.join(DATA_DIR, "imdb5k") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
# text and label column names | |
TEXT_COL = "text" | |
LABEL_COL = "label" | |
def clean_html(text: str): | |
"remove html tags and whitespaces" | |
cleanr = re.compile('<.*?>') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.data import TensorDataset, random_split, DataLoader | |
import numpy as np | |
import warnings | |
from tqdm import tqdm_notebook as tqdm | |
from typing import Tuple | |
NUM_MAX_POSITIONS = 256 | |
BATCH_SIZE = 32 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from concurrent.futures import ProcessPoolExecutor | |
from multiprocessing import cpu_count | |
from itertools import repeat | |
num_cores = cpu_count() | |
def process_row(processor, row): | |
return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL])) | |
def create_dataloader(df: pd.DataFrame, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import torch | |
LOG_DIR = "./logs/" | |
CACHE_DIR = "./cache/" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
FineTuningConfig = namedtuple('FineTuningConfig', | |
field_names="num_classes, dropout, init_range, batch_size, lr, max_norm," |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn as nn | |
import torch | |
class Transformer(nn.Module): | |
"Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial" | |
def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions, | |
num_heads, num_layers, dropout, causal): | |
super().__init__() | |
self.causal = causal |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pytorch_transformers import cached_path | |
# download pre-trained model and config | |
state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/" | |
"naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu') | |
config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/" | |
"naacl-2019-tutorial/model_training_args.bin")) | |
# init model: Transformer base + classifier head |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ignite.engine import Engine, Events | |
from ignite.metrics import RunningAverage, Accuracy | |
from ignite.handlers import ModelCheckpoint | |
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar | |
import torch.nn.functional as F | |
from pytorch_transformers.optimization import AdamW | |
# Bert optimizer | |
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fit the model on `train_dl`" | |
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs) | |
# save model weights | |
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth")) | |
# evaluate the model on `test_dl`" | |
evaluator.run(test_dl) | |
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}") |
OlderNewer