Gergely Nemeth negedng

Sentence	BLEU	BLEUw2	BertED	BertED*	BertCS	SpacyS
James Cook was a very good man and a loving husband.	1.0	1.0	0.0	1.0	1.0	1.0
James Cook was a very nice man and a loving husband.	0.735	0.761	1.298	0.789	0.996	0.997
James Cook was a bad man and a terrible husband.	0.342	0.425	3.907	0.490	0.965	0.971
James Cook was a nice person and a good husband.	0.295	0.381	2.258	0.663	0.988	0.982
The sky is blue today and learning history is important.	0.639	0.115	6.831	0.288	0.891	0.801

	Sentence	Type
	Game birds consist of teal and wild duck, snipe, jungle fowl and peacock.	0
	He slid up right behind her before she could duck into a shop.	1
	The cables are wrapped in cotton duck soaked in oxidized oil and varnish, and are sheathed in sheet iron.	2
	Geese, duck and teal are abundant.	0
	The duck, on the other hand, when newly hatched, and for nearly a month after, has ternum wholly cartilaginous.	0
	I have laughed at the poor duck, with the red rag tied round its leg.	0
	A cannon ball, flying close to him, caused him to duck and bend over his horse.	1
	I just need a place to duck out of the rain for a bit.	1
	Hen and duck house.	0

	import numpy as np


	def prim_np(weights, node1, node2):
	"""Prim's algorithm is a greedy algorithm that
	finds a minimum spanning tree for a weighted undirected graph.
	1. Initialize a tree with a single vertex,
	chosen arbitrarily from the graph.
	2. Grow the tree by one edge: of the edges that connect the tree to
	vertices not yet in the tree, find the minimum-weight edge,

	def random_tree(V):
	heads = [i for i in range(V-1)]
	tails = [i+1 for i in range(V-1)]
	weights = list(np.random.rand(V-1))
	G = [[heads[i],tails[i],weights[i]] for i in range(len(heads))]
	return V, G


	def random_complete_graph(V):
	heads, tails, weights = [], [], []

	# Word list
	Wl = ['man', 'woman', 'rich', 'poor', 'queen',
	'king', 'fisherman', 'teacher', 'actress', 'actor']
	Wv = []
	for i in range(len(Wl)):
	# Embeddings
	Wv.append(word2vec[Wl[i]])
	# To-be basis
	b1 = (Wv[1]-Wv[0])
	b2 = (Wv[3]-Wv[2])

	def get_visual_embs(sentence):
	"""Get BERT embedding for the sentence,
	project it to a 2D subspace where [CLS] is (1,0) and [SEP] is (0,1)."""
	embs = bert_embedding([sentence], filter_spec_tokens=False)
	tokens = embs[0][0]
	embV = embs[0][1]
	W = np.array(embV)

	B = np.array([embV[0], embV[-1]])
	Bi = np.linalg.pinv(B.T)

	import tensorflow_hub as hub
	import tensorflow as tf
	import bert
	FullTokenizer = bert.bert_tokenization.FullTokenizer
	from tensorflow.keras.models import Model # Keras is the new high level API for TensorFlow
	import math

	max_seq_length = 128 # Your choice here.
	input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
	name="input_word_ids")
	input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
	name="input_mask")
	segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
	name="segment_ids")
	bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
	trainable=True)
	pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])

	def get_masks(tokens, max_seq_length):
	"""Mask for padding"""
	if len(tokens)>max_seq_length:
	raise IndexError("Token length more than max seq length!")
	return [1]len(tokens) + [0] (max_seq_length - len(tokens))


	def get_segments(tokens, max_seq_length):
	"""Segments: 0 for the first sequence, 1 for the second"""
	if len(tokens)>max_seq_length: