ben_oght_ah_eight ben0it8

Computational Neuroscience, NLP & Computer Vision

ben0it8 / cfg-init

Last active March 3, 2019 22:01

initialize dotfiles

	sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
	echo "ZSH=$HOME/.oh-my-zsh" >> ~/.zshrc
	echo "source $ZSH/oh-my-zsh.sh" >> ~/.zshrc
	echo "ZSH_THEME='robbyrussel'" >> ~/.zshrc
	echo "plugins = (git python osx web-search vi-mode dotenv" >> ~/.zshrc
	echo "alias config='/usr/bin/git --git-dir=$HOME/.cfg/ --work-tree=$HOME'" >> ~/.zshrc
	source ~/.zshrc
	echo ".cfg" >> .gitignore
	git clone --bare https://github.com/ben0it8/dotfiles.git .cfg/
	config checkout

ben0it8 / download_imdb.py

Last active July 18, 2019 13:55

read imdb

	import os
	import requests
	import tarfile
	from tqdm import tqdm

	# path to data
	DATA_DIR = os.path.abspath('./data')

	# path to IMDB
	IMDB_DIR = os.path.join(DATA_DIR, "imdb5k")

ben0it8 / read_clean_imdb_data.py

Last active July 18, 2019 13:56

read and clean imdb data

	import pandas as pd
	import re

	# text and label column names
	TEXT_COL = "text"
	LABEL_COL = "label"

	def clean_html(text: str):
	"remove html tags and whitespaces"
	cleanr = re.compile('<.*?>')

ben0it8 / bert_textprocessor.py

Last active July 18, 2019 14:04

Create bert textprocessor

	import torch
	from torch.utils.data import TensorDataset, random_split, DataLoader
	import numpy as np
	import warnings
	from tqdm import tqdm_notebook as tqdm
	from typing import Tuple

	NUM_MAX_POSITIONS = 256
	BATCH_SIZE = 32

ben0it8 / create_dataloders.py

Last active July 25, 2019 11:37

Create dataloders

	from concurrent.futures import ProcessPoolExecutor
	from multiprocessing import cpu_count
	from itertools import repeat

	num_cores = cpu_count()

	def process_row(processor, row):
	return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))

	def create_dataloader(df: pd.DataFrame,

ben0it8 / finetuning_config.py

Last active July 17, 2019 09:06

Fine-tuning config

	from collections import namedtuple
	import torch

	LOG_DIR = "./logs/"
	CACHE_DIR = "./cache/"

	device = "cuda" if torch.cuda.is_available() else "cpu"

	FineTuningConfig = namedtuple('FineTuningConfig',
	field_names="num_classes, dropout, init_range, batch_size, lr, max_norm,"

ben0it8 / transformer_models.py

Last active July 12, 2019 13:27

Transformer models

	import torch.nn as nn
	import torch

	class Transformer(nn.Module):
	"Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"

	def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions,
	num_heads, num_layers, dropout, causal):
	super().__init__()
	self.causal = causal

ben0it8 / load_pretrained_transformer.py

Last active July 17, 2019 09:00

load pretrained NAACL Transformer

	from pytorch_transformers import cached_path

	# download pre-trained model and config
	state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
	"naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')

	config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
	"naacl-2019-tutorial/model_training_args.bin"))

	# init model: Transformer base + classifier head

ben0it8 / prepare_training_eval_loops.py

Last active July 18, 2019 14:05

prepare training and eval loops

	from ignite.engine import Engine, Events
	from ignite.metrics import RunningAverage, Accuracy
	from ignite.handlers import ModelCheckpoint
	from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
	import torch.nn.functional as F
	from pytorch_transformers.optimization import AdamW

	# Bert optimizer
	optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False)

ben0it8 / finetune_and_eval.py

Last active July 25, 2019 15:45

fine-tune and evaluate model

	# fit the model on `train_dl`"
	trainer.run(train_dl, max_epochs=finetuning_config.n_epochs)

	# save model weights
	torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth"))

	# evaluate the model on `test_dl`"
	evaluator.run(test_dl)
	print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}")