This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 | |
WORKDIR /srv | |
RUN chmod 777 /tmp | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
build-essential \ | |
python3.7 python3.7-dev python3-virtualenv | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set up paths | |
ENV RESOURCES_PATH="/resources" \ | |
DATA_PATH="/data" \ | |
LOG_PATH="/logs" | |
RUN \ | |
mkdir -p $RESOURCES_PATH && \ | |
mkdir -p $DATA_PATH && \ | |
chmod -R a+rwx $DATA_PATH |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install training requirements | |
COPY docker-res/requirements.txt "/tmp/" | |
RUN pip install -r "tmp/requirements.txt" | |
# Install app requirements | |
RUN pip install --upgrade uvicorn fastapi |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import logging, sys, os | |
logging.basicConfig(stream=sys.stdout, | |
format='%(asctime)s : %(levelname)s : %(message)s', | |
level=logging.INFO) | |
import torch | |
import torch.nn.functional as F | |
from pytorch_transformers import BertTokenizer | |
from utils import TransformerWithClfHead | |
from types import SimpleNamespace |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fit the model on `train_dl`" | |
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs) | |
# save model weights | |
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth")) | |
# evaluate the model on `test_dl`" | |
evaluator.run(test_dl) | |
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ignite.engine import Engine, Events | |
from ignite.metrics import RunningAverage, Accuracy | |
from ignite.handlers import ModelCheckpoint | |
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar | |
import torch.nn.functional as F | |
from pytorch_transformers.optimization import AdamW | |
# Bert optimizer | |
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pytorch_transformers import cached_path | |
# download pre-trained model and config | |
state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/" | |
"naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu') | |
config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/" | |
"naacl-2019-tutorial/model_training_args.bin")) | |
# init model: Transformer base + classifier head |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch.nn as nn | |
import torch | |
class Transformer(nn.Module): | |
"Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial" | |
def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions, | |
num_heads, num_layers, dropout, causal): | |
super().__init__() | |
self.causal = causal |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import torch | |
LOG_DIR = "./logs/" | |
CACHE_DIR = "./cache/" | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
FineTuningConfig = namedtuple('FineTuningConfig', | |
field_names="num_classes, dropout, init_range, batch_size, lr, max_norm," |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from concurrent.futures import ProcessPoolExecutor | |
from multiprocessing import cpu_count | |
from itertools import repeat | |
num_cores = cpu_count() | |
def process_row(processor, row): | |
return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL])) | |
def create_dataloader(df: pd.DataFrame, |
NewerOlder