Skip to content

Instantly share code, notes, and snippets.

View gaphex's full-sized avatar

Denis gaphex

  • Moscow
View GitHub Profile
We can't make this file beautiful and searchable because it's too large.
-0.2396 -0.3936 0.4846 0.3276 0.1783 -0.5713 0.3499 -0.188 0.1774 0.2354 -0.3655 0.728 0.199 -0.3923 -0.5625 -1.069 -0.011406 -0.262 -0.06744 -0.1746 -0.2996 -0.2988 -0.276 0.2197 0.08594 0.07465 0.3308 -0.3433 -0.765 -0.2346 0.284 0.988 -0.4949 -0.6606 -0.1503 0.3765 0.319 -0.187 -0.742 -0.10516 0.6074 0.00613 -0.3474 -0.004234 -0.6104 -0.274 0.0383 -0.7847 0.03494 -0.4507 0.2622 -0.02382 0.221 -0.1599 0.444 -0.9307 -0.4902 0.3386 0.1879 0.5327 -0.3997 0.09283 -1.3545 -0.2299 0.2252 -0.1433 -0.4604 -0.0736 -0.6255 0.1567 -0.10876 0.5825 0.07086 -0.2136 0.647 0.382 -0.6865 -0.993 0.3694 1.3545 -0.3203 -0.253 -0.1952 0.775 -0.2417 0.6274 0.4558 0.4338 -0.03217 0.315 -0.698 -0.15 -0.1605 0.00672 0.2632 0.26 -0.1035 -0.2847 -0.02184 0.239 -0.768 -0.1595 0.5454 -0.34 -0.1697 -0.218 -0.5386 0.10345 1.454 -0.1647 -0.2742 -0.884 -0.1985 0.2947 0.5503 -0.349 -0.1948 -0.02135 1.394 0.5586 0.1858 1.072 -0.861 0.683 0.968 -0.744 0.679 0.604 -0.01235 0.1373 0.833 0.225 0.08813 0.403 -0.7134 -0.1246 -0.2693 0.00641 0.4744
class PearsonrRankCallback(Callback):
def __init__(self, loader, filepaths, name=None, verbose=False,
sim_model=None, savemodel=None, savepath=None):
self.savemodel = savemodel
self.savepath = savepath
self.sim_model = sim_model
self.loader = loader
self.verbose = verbose
def softmax_loss(vectors):
anc, pos, neg = vectors
pos_sim = tf.reduce_sum((anc * pos), axis=-1, keepdims=True)
neg_mul = tf.matmul(anc, neg, transpose_b=True)
neg_sim = tf.log(tf.reduce_sum(tf.exp(neg_mul), axis=-1, keepdims=True))
loss = tf.nn.relu(neg_sim - pos_sim)
return loss
def build_model(module_path, seq_len = 24, tune_lr=6, loss = softmax_loss):
inp_anc = tf.keras.Input(shape=(1, ), dtype=tf.string)
inp_pos = tf.keras.Input(shape=(1, ), dtype=tf.string)
inp_neg = tf.keras.Input(shape=(1, ), dtype=tf.string)
sent_encoder = BertLayer(module_path, seq_len, n_tune_layers=tune_lr, do_preprocessing=True,
verbose=False, pooling="mean", trainable=True, tune_embeddings=False)
anc_enc = sent_encoder(inp_anc)
class TripletGenerator:
def __init__(self, datadict, hard_frac = 0.2, batch_size=256):
self.datadict = datadict
self._anchor_idx = np.array(list(self.datadict.keys()))
self._hard_frac = hard_frac
self._generator = self.generate_batch(batch_size)
def generate_batch(self, size):
while True:
train_data = ["./snli_1.0/snli_1.0_train.jsonl", "./multinli_1.0/multinli_1.0_train.jsonl"]
test_data = ["./snli_1.0/snli_1.0_test.jsonl", "./multinli_1.0/multinli_1.0_dev_matched.jsonl"]
tr_a, tr_b, tr_l = load_snli(train_data)
ts_a, ts_b, ts_l = load_snli(test_data)
fd_tr = prepare_dataset(tr_a, tr_b, tr_l)
fd_ts = prepare_dataset(ts_a, ts_b, ts_l)
def prepare_snli(sa, sb, lb):
classes = {"entailment", "contradiction"}
anc_to_pairs = defaultdict(list)
filtered = {}
skipped = 0
anchor_id = 0
for xa, xb, y in zip(sa, sb, lb):
anc_to_pairs[xa].append((xb, y))
def load_snli(fpaths):
sa, sb, lb = [], [], []
fpaths = np.atleast_1d(fpaths)
for fpath in fpaths:
with open(fpath) as fi:
for line in fi:
sample = json.loads(line)
sa.append(sample['sentence1'])
sb.append(sample['sentence2'])
lb.append(sample['gold_label'])
from loader import load_sts, load_sick2014
from bert_experimental.finetuning.text_preprocessing import build_preprocessor
from bert_experimental.finetuning.bert_layer import BertLayer
from bert_experimental.finetuning.modeling import BertConfig, BertModel, build_bert_module
BERT_DIR = "/content/uncased_L-12_H-768_A-12/"
build_bert_module(BERT_DIR+"bert_config.json",
BERT_DIR+"vocab.txt",
!wget https://storage.googleapis.com/bert_models/2018_10_18/uncased_L-12_H-768_A-12.zip
!unzip uncased_L-12_H-768_A-12.zip
!wget https://nlp.stanford.edu/projects/snli/snli_1.0.zip
!unzip snli_1.0.zip
!wget https://www.nyu.edu/projects/bowman/multinli/multinli_1.0.zip
!unzip multinli_1.0.zip
!git clone https://github.com/brmson/dataset-sts
!git clone https://github.com/gaphex/bert_experimental