gaphex’s gists

gaphex / projector_config.json

Created February 21, 2020 08:21

	{
	"embeddings": [
	{
	"tensorName": "Intent40",
	"tensorShape": [
	1360,
	768
	],
	"tensorPath": "https://gist.githubusercontent.com/gaphex/d56f0124a7b7459408d92bbaca373f3e/raw/abb29897ab1015d7e05d9eda90514aeb45181953/embeddings.tsv",
	"metadataPath": "https://gist.githubusercontent.com/gaphex/2793c044577abc73cff7d438a2097ef1/raw/014ab59be93252b9516c0883d21dba6a9b0622ad/metadata.tsv"

gaphex / download_assets.sh

Last active February 26, 2020 10:04

	!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
	!unzip uncased_L-12_H-768_A-12.zip
	!wget https://nlp.stanford.edu/projects/snli/snli_1.0.zip
	!unzip snli_1.0.zip
	!wget https://www.nyu.edu/projects/bowman/multinli/multinli_1.0.zip
	!unzip multinli_1.0.zip

	!git clone https://github.com/brmson/dataset-sts
	!git clone https://github.com/gaphex/bert_experimental

gaphex / build_bert_module.py

Created February 26, 2020 10:08

	from loader import load_sts, load_sick2014

	from bert_experimental.finetuning.text_preprocessing import build_preprocessor
	from bert_experimental.finetuning.bert_layer import BertLayer
	from bert_experimental.finetuning.modeling import BertConfig, BertModel, build_bert_module

	BERT_DIR = "/content/uncased_L-12_H-768_A-12/"

	build_bert_module(BERT_DIR+"bert_config.json",
	BERT_DIR+"vocab.txt",

gaphex / load_snli.py

Created February 26, 2020 10:14

gaphex / prepare_snli.py

Last active February 26, 2020 13:48

	def prepare_snli(sa, sb, lb):

	classes = {"entailment", "contradiction"}
	anc_to_pairs = defaultdict(list)
	filtered = {}
	skipped = 0
	anchor_id = 0

	for xa, xb, y in zip(sa, sb, lb):
	anc_to_pairs[xa].append((xb, y))

gaphex / load_mnli.py

Created February 26, 2020 10:27

	train_data = ["./snli_1.0/snli_1.0_train.jsonl", "./multinli_1.0/multinli_1.0_train.jsonl"]
	test_data = ["./snli_1.0/snli_1.0_test.jsonl", "./multinli_1.0/multinli_1.0_dev_matched.jsonl"]

	tr_a, tr_b, tr_l = load_snli(train_data)
	ts_a, ts_b, ts_l = load_snli(test_data)

	fd_tr = prepare_dataset(tr_a, tr_b, tr_l)
	fd_ts = prepare_dataset(ts_a, ts_b, ts_l)

gaphex / data_generator.py

Last active February 29, 2020 14:55

	class TripletGenerator:
	def __init__(self, datadict, hard_frac = 0.2, batch_size=256):
	self.datadict = datadict
	self._anchor_idx = np.array(list(self.datadict.keys()))
	self._hard_frac = hard_frac
	self._generator = self.generate_batch(batch_size)

	def generate_batch(self, size):
	while True:

gaphex / bert_dssm.py

Last active November 12, 2020 09:39

	def build_model(module_path, seq_len = 24, tune_lr=6, loss = softmax_loss):

	inp_anc = tf.keras.Input(shape=(1, ), dtype=tf.string)
	inp_pos = tf.keras.Input(shape=(1, ), dtype=tf.string)
	inp_neg = tf.keras.Input(shape=(1, ), dtype=tf.string)

	sent_encoder = BertLayer(module_path, seq_len, n_tune_layers=tune_lr, do_preprocessing=True,
	verbose=False, pooling="mean", trainable=True, tune_embeddings=False)

	anc_enc = sent_encoder(inp_anc)

gaphex / softmax_loss.py

Last active March 3, 2020 18:37

	def softmax_loss(vectors):
	anc, pos, neg = vectors

	pos_sim = tf.reduce_sum((anc * pos), axis=-1, keepdims=True)
	neg_mul = tf.matmul(anc, neg, transpose_b=True)
	neg_sim = tf.log(tf.reduce_sum(tf.exp(neg_mul), axis=-1, keepdims=True))

	loss = tf.nn.relu(neg_sim - pos_sim)

	return loss

gaphex / pearsonr_callback.py

Created February 29, 2020 17:04

	class PearsonrRankCallback(Callback):

	def __init__(self, loader, filepaths, name=None, verbose=False,
	sim_model=None, savemodel=None, savepath=None):

	self.savemodel = savemodel
	self.savepath = savepath
	self.sim_model = sim_model
	self.loader = loader
	self.verbose = verbose

Denis gaphex