gaphex’s gists

gaphex / intent_embeddings.tsv

Created November 9, 2020 00:38

We can't make this file beautiful and searchable because it's too large.

-0.2396	-0.3936	0.4846	0.3276	0.1783	-0.5713	0.3499	-0.188	0.1774	0.2354	-0.3655	0.728	0.199	-0.3923	-0.5625	-1.069	-0.011406	-0.262	-0.06744	-0.1746	-0.2996	-0.2988	-0.276	0.2197	0.08594	0.07465	0.3308	-0.3433	-0.765	-0.2346	0.284	0.988	-0.4949	-0.6606	-0.1503	0.3765	0.319	-0.187	-0.742	-0.10516	0.6074	0.00613	-0.3474	-0.004234	-0.6104	-0.274	0.0383	-0.7847	0.03494	-0.4507	0.2622	-0.02382	0.221	-0.1599	0.444	-0.9307	-0.4902	0.3386	0.1879	0.5327	-0.3997	0.09283	-1.3545	-0.2299	0.2252	-0.1433	-0.4604	-0.0736	-0.6255	0.1567	-0.10876	0.5825	0.07086	-0.2136	0.647	0.382	-0.6865	-0.993	0.3694	1.3545	-0.3203	-0.253	-0.1952	0.775	-0.2417	0.6274	0.4558	0.4338	-0.03217	0.315	-0.698	-0.15	-0.1605	0.00672	0.2632	0.26	-0.1035	-0.2847	-0.02184	0.239	-0.768	-0.1595	0.5454	-0.34	-0.1697	-0.218	-0.5386	0.10345	1.454	-0.1647	-0.2742	-0.884	-0.1985	0.2947	0.5503	-0.349	-0.1948	-0.02135	1.394	0.5586	0.1858	1.072	-0.861	0.683	0.968	-0.744	0.679	0.604	-0.01235	0.1373	0.833	0.225	0.08813	0.403	-0.7134	-0.1246	-0.2693	0.00641	0.4744

gaphex / pearsonr_callback.py

Created February 29, 2020 17:04

	class PearsonrRankCallback(Callback):

	def __init__(self, loader, filepaths, name=None, verbose=False,
	sim_model=None, savemodel=None, savepath=None):

	self.savemodel = savemodel
	self.savepath = savepath
	self.sim_model = sim_model
	self.loader = loader
	self.verbose = verbose

gaphex / softmax_loss.py

Last active March 3, 2020 18:37

	def softmax_loss(vectors):
	anc, pos, neg = vectors

	pos_sim = tf.reduce_sum((anc * pos), axis=-1, keepdims=True)
	neg_mul = tf.matmul(anc, neg, transpose_b=True)
	neg_sim = tf.log(tf.reduce_sum(tf.exp(neg_mul), axis=-1, keepdims=True))

	loss = tf.nn.relu(neg_sim - pos_sim)

	return loss

gaphex / bert_dssm.py

Last active November 12, 2020 09:39

	def build_model(module_path, seq_len = 24, tune_lr=6, loss = softmax_loss):

	inp_anc = tf.keras.Input(shape=(1, ), dtype=tf.string)
	inp_pos = tf.keras.Input(shape=(1, ), dtype=tf.string)
	inp_neg = tf.keras.Input(shape=(1, ), dtype=tf.string)

	sent_encoder = BertLayer(module_path, seq_len, n_tune_layers=tune_lr, do_preprocessing=True,
	verbose=False, pooling="mean", trainable=True, tune_embeddings=False)

	anc_enc = sent_encoder(inp_anc)

gaphex / data_generator.py

Last active February 29, 2020 14:55

	class TripletGenerator:
	def __init__(self, datadict, hard_frac = 0.2, batch_size=256):
	self.datadict = datadict
	self._anchor_idx = np.array(list(self.datadict.keys()))
	self._hard_frac = hard_frac
	self._generator = self.generate_batch(batch_size)

	def generate_batch(self, size):
	while True:

gaphex / load_mnli.py

Created February 26, 2020 10:27

	train_data = ["./snli_1.0/snli_1.0_train.jsonl", "./multinli_1.0/multinli_1.0_train.jsonl"]
	test_data = ["./snli_1.0/snli_1.0_test.jsonl", "./multinli_1.0/multinli_1.0_dev_matched.jsonl"]

	tr_a, tr_b, tr_l = load_snli(train_data)
	ts_a, ts_b, ts_l = load_snli(test_data)

	fd_tr = prepare_dataset(tr_a, tr_b, tr_l)
	fd_ts = prepare_dataset(ts_a, ts_b, ts_l)

gaphex / prepare_snli.py

Last active February 26, 2020 13:48

	def prepare_snli(sa, sb, lb):

	classes = {"entailment", "contradiction"}
	anc_to_pairs = defaultdict(list)
	filtered = {}
	skipped = 0
	anchor_id = 0

	for xa, xb, y in zip(sa, sb, lb):
	anc_to_pairs[xa].append((xb, y))

gaphex / load_snli.py

Created February 26, 2020 10:14

	def load_snli(fpaths):
	sa, sb, lb = [], [], []
	fpaths = np.atleast_1d(fpaths)
	for fpath in fpaths:
	with open(fpath) as fi:
	for line in fi:
	sample = json.loads(line)
	sa.append(sample['sentence1'])
	sb.append(sample['sentence2'])
	lb.append(sample['gold_label'])

gaphex / build_bert_module.py

Created February 26, 2020 10:08

	from loader import load_sts, load_sick2014

	from bert_experimental.finetuning.text_preprocessing import build_preprocessor
	from bert_experimental.finetuning.bert_layer import BertLayer
	from bert_experimental.finetuning.modeling import BertConfig, BertModel, build_bert_module

	BERT_DIR = "/content/uncased_L-12_H-768_A-12/"

	build_bert_module(BERT_DIR+"bert_config.json",
	BERT_DIR+"vocab.txt",

gaphex / download_assets.sh

Last active February 26, 2020 10:04

	!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
	!unzip uncased_L-12_H-768_A-12.zip
	!wget https://nlp.stanford.edu/projects/snli/snli_1.0.zip
	!unzip snli_1.0.zip
	!wget https://www.nyu.edu/projects/bowman/multinli/multinli_1.0.zip
	!unzip multinli_1.0.zip

	!git clone https://github.com/brmson/dataset-sts
	!git clone https://github.com/gaphex/bert_experimental

Denis gaphex