infinex · March 23, 2021 15:31
diff --git a/convert_to_tf_samples.py b/convert_to_tf_samples.py
 import torch
 from transformers import TFAutoModel, AutoTokenizer,AutoModel
 import os
 import tensorflow as tf

 os.environ["TOKENIZERS_PARALLELISM"] = "false"

 tf_model = TFAutoModel.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2',from_pt=True)
 model = AutoModel.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2')
 tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2')

 texts = ['I love to sleep','Sleeping is me']
 dataset = tokenizer.batch_encode_plus(texts,padding='longest')
 tf_dataset = tokenizer.batch_encode_plus(texts,padding='max_length',return_tensors='tf',max_length=128)
 tf_iter = tf.data.Dataset.from_tensor_slices(tf_dataset).batch(32)

 for batch in tf_iter:
    print(tf_model(batch))

 class ModelFn(tf.Module):
    def __init__(self, model):
        self.model = model
        self.max_seq_length = 512

    @tf.function(input_signature=[
        tf.TensorSpec(shape=(None, 128), dtype=tf.int64),
        tf.TensorSpec(shape=(None, 128), dtype=tf.int64),

    ])
    def predict_export(self, input_ids, attention_mask):
        results = self.model(
            {'input_ids': input_ids, 'attention_mask': attention_mask},
            training=False).pooler_output
        return results

    @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)
                                  ])
    def predict_export_serialised(self, serialized):
        input_features = {
            'input_ids': tf.io.FixedLenFeature([self.max_seq_length], tf.int64),
            'attention_mask': tf.io.FixedLenFeature(
                [self.max_seq_length], tf.int64),
        }
        example = tf.io.parse_example(serialized=serialized, features=input_features)
        # convert to list input as per define in keras model
        results = self.model(example,training=False).pooler_output
        return results

    def export_save_model(self, export_dir):
        export_dir = "%s/1/" % export_dir
        tf.saved_model.save(self.model, export_dir,
                            {'predict_b64': self.predict_export_serialised,
                             'predict': self.predict_export})

 ModelFn(tf_model).export_save_model('model')

 class TransformerDataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

 ds = TransformerDataset(dataset)
 trainloader=torch.utils.data.DataLoader(ds, batch_size=32, shuffle=False, num_workers=8)

 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model.to(device)

 for batch in trainloader:
    batch = {k: v.to(device) for k, v in batch.items()}
    outputs = model(**batch)
    print(outputs.pooler_output)
diff --git a/MONO T5 b/MONO T5
 import torch
 from transformers import AutoTokenizer, T5ForConditionalGeneration

 passages = [['7744105', 'For Earth-centered it was  Geocentric Theory proposed by greeks under the guidance of Ptolemy and Sun-centered was Heliocentric theory proposed by Nicolas Copernicus in 16th century A.D. In short, Your Answers are: 1st blank - Geo-Centric Theory. 2nd blank - Heliocentric Theory.'], ['2593796', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.he geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.'], ['6217200', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.opernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.'], ['3276925', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.Simple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect.ou might want to check out one article on the history of the geocentric model and one regarding the geocentric theory. Here are links to two other articles from Universe Today on what the center of the universe is and Galileo one of the advocates of the heliocentric model.'], ['6217208', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.Simple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect.opernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.'], ['4280557', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.imple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect. You might want to check out one article on the history of the geocentric model and one regarding the geocentric theory.'], ['264181', 'Nicolaus Copernicus (b. 1473â\x80\x93d. 1543) was the first modern author to propose a heliocentric theory of the universe. From the time that Ptolemy of Alexandria (c. 150 CE) constructed a mathematically competent version of geocentric astronomy to Copernicusâ\x80\x99s mature heliocentric version (1543), experts knew that the Ptolemaic system diverged from the geocentric concentric-sphere conception of Aristotle.'], ['4280558', 'A Geocentric theory is an astronomical theory which describes the universe as a Geocentric system, i.e., a system which puts the Earth in the center of the universe, and describes other objects from the point of view of the Earth. Geocentric theory is an astronomical theory which describes the universe as a Geocentric system, i.e., a system which puts the Earth in the center of the universe, and describes other objects from the point of view of the Earth.'], ['3276926', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 91 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.ou might want to check out one article on the history of the geocentric model and one regarding the geocentric theory. Here are links to two other articles from Universe Today on what the center of the universe is and Galileo one of the advocates of the heliocentric model.'], ['5183032', "After 1,400 years, Copernicus was the first to propose a theory which differed from Ptolemy's geocentric system, according to which the earth is at rest in the center with the rest of the planets revolving around it."]]

 query = 'who proposed the geocentric theory'

 pattern = "Query: {query} Document: {document} Relevant:"

 model = 'castorini/monot5-base-msmarco'
 texts = [p[1] for p in passages]

 model = T5ForConditionalGeneration.from_pretrained(model)
 tokenizer = AutoTokenizer.from_pretrained('t5-base', use_fast=False)
 EOS = tokenizer.eos_token
 MAX_LENGTH= 512
 t4_tokenizer = {'return_attention_mask': True,
                'padding': 'longest',
                'truncation': True,
                'max_length': 512}


 dataset = tokenizer.batch_encode_plus(
    [pattern.format(query=query, document=document) for
     document in
     texts],**t4_tokenizer)



 @torch.no_grad()
 def greedy_decode(model,
                  input_ids: torch.Tensor,
                  length: int,
                  attention_mask: torch.Tensor = None,
                  return_last_logits: bool = True):
    decode_ids = torch.full((input_ids.size(0), 1),
                            model.config.decoder_start_token_id,
                            dtype=torch.long).to(input_ids.device)
    encoder_outputs = model.get_encoder()(input_ids, attention_mask=attention_mask)
    next_token_logits = None
    for _ in range(length):
        model_inputs = model.prepare_inputs_for_generation(
            decode_ids,
            encoder_outputs=encoder_outputs,
            past=None,
            attention_mask=attention_mask,
            use_cache=True)
        outputs = model(**model_inputs)  # (batch_size, cur_len, vocab_size)
        next_token_logits = outputs[0][:, -1, :]  # (batch_size, vocab_size)
        decode_ids = torch.cat([decode_ids,
                                next_token_logits.max(1)[1].unsqueeze(-1)],
                               dim=-1)
    if return_last_logits:
        return decode_ids, next_token_logits
    return decode_ids

 import torch

 class MonoT5Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        return item

    def __len__(self):
        return len(self.encodings['input_ids'])

 ds = MonoT5Dataset(dataset)
 trainloader=torch.utils.data.DataLoader(ds, batch_size=32, shuffle=False, num_workers=8)

 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model.to(device)

 for batch in trainloader:
    input_ids = batch['input_ids'].to(device)
    attn_mask = batch['attention_mask'].to(device)
    _, batch_scores = greedy_decode(model,
                                    input_ids,
                                    length=1,
                                    attention_mask=attn_mask,
                                    return_last_logits=True)
    # 6136 and 1176 are the indexes of the tokens false and true in T6.
    batch_scores = batch_scores[:, [6136, 1176]]
    batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
    batch_log_probs = batch_scores[:, 1].tolist()
    for score in batch_log_probs:
        print(score)
diff --git a/ms_macro_ranking.py b/ms_macro_ranking.py
 """
 This examples demonstrates the setup for Query / Question-Answer-Retrieval.

 You can input a query or a question. The script then uses semantic search
 to find relevant passages in Simple English Wikipedia (as it is smaller and fits better in RAM).

 For semantic search, we use SentenceTransformer('msmarco-distilbert-base-v2') and retrieve
 100 potentially passages that answer the input query.

 Next, we use a more powerful CrossEncoder (cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')) that
 scores the query and all retrieved passages for their relevancy. The cross-encoder is neccessary to filter out certain noise
 that might be retrieved from the semantic search step.

 Google Colab Example: https://colab.research.google.com/drive/1l6stpYdRMmeDBK_vw0L5NitdiAuhdsAr?usp=sharing
 """
 import json
 from pathlib import Path

 from sentence_transformers import SentenceTransformer, CrossEncoder, util
 import time
 import gzip
 import os
 import torch

 #We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
 model_name = 'msmarco-distilbert-base-v2'
 bi_encoder = SentenceTransformer(model_name)
 top_k = 100     #Number of passages we want to retrieve with the bi-encoder

 #The bi-encoder will retrieve 100 documents. We use a cross-encoder, to re-rank the results list to improve the quality
 cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')

 # As dataset, we use Simple English Wikipedia. Compared to the full English wikipedia, it has only
 # about 170k articles. We split these articles into paragraphs and encode them with the bi-encoder

 wikipedia_filepath = 'data/simplewiki-2020-11-01.jsonl.gz'

 if not os.path.exists(wikipedia_filepath):
    util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)

 passages = []
 with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:
    for line in fIn:
        data = json.loads(line.strip())
        passages.extend(data['paragraphs'])

 #If you like, you can also limit the number of passages you want to use
 print("Passages:", len(passages))

 # To speed things up, pre-computed embeddings are downloaded.
 # The provided file encoded the passages with the model 'msmarco-distilbert-base-v2'
 embeddings_filepath = f'{Path(wikipedia_filepath).stem}-{model_name}.pt'

 if os.path.exists(embeddings_filepath):
  corpus_embeddings = torch.load(embeddings_filepath)
  corpus_embeddings = corpus_embeddings.float() #Convert embedding file to float
  if torch.cuda.is_available():
    corpus_embeddings = corpus_embeddings.to('cuda')
 else:   #Here, we compute the corpus_embeddings from scratch (which can take a while depending on the GPU)
  corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
  torch.save(corpus_embeddings,Path(embeddings_filepath))

 while True:
    query = input("Please enter a question: ")

    #Encode the query using the bi-encoder and find potentially relevant passages
    start_time = time.time()
    question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k)
    hits = hits[0]  # Get the hits for the first query

    #Now, score all retrieved passages with the cross_encoder
    cross_inp = [[query, passages[hit['corpus_id']]] for hit in hits]
    cross_scores = cross_encoder.predict(cross_inp)

    #Sort results by the cross-encoder scores
    for idx in range(len(cross_scores)):
        hits[idx]['cross-score'] = cross_scores[idx]

    hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
    end_time = time.time()

    #Output of top-5 hits
    print("Input question:", query)
    print("Results (after {:.3f} seconds):".format(end_time - start_time))
    for hit in hits[0:5]:
        print("\t{:.3f}\t{}".format(hit['cross-score'], passages[hit['corpus_id']]))

    print("\n\n========\n")
	import torch
	from transformers import TFAutoModel, AutoTokenizer,AutoModel
	import os
	import tensorflow as tf

	os.environ["TOKENIZERS_PARALLELISM"] = "false"

	tf_model = TFAutoModel.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2',from_pt=True)
	model = AutoModel.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2')
	tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-distilroberta-base-v2')

	texts = ['I love to sleep','Sleeping is me']
	dataset = tokenizer.batch_encode_plus(texts,padding='longest')
	tf_dataset = tokenizer.batch_encode_plus(texts,padding='max_length',return_tensors='tf',max_length=128)
	tf_iter = tf.data.Dataset.from_tensor_slices(tf_dataset).batch(32)

	for batch in tf_iter:
	print(tf_model(batch))

	class ModelFn(tf.Module):
	def __init__(self, model):
	self.model = model
	self.max_seq_length = 512

	@tf.function(input_signature=[
	tf.TensorSpec(shape=(None, 128), dtype=tf.int64),
	tf.TensorSpec(shape=(None, 128), dtype=tf.int64),

	])
	def predict_export(self, input_ids, attention_mask):
	results = self.model(
	{'input_ids': input_ids, 'attention_mask': attention_mask},
	training=False).pooler_output
	return results

	@tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)
	])
	def predict_export_serialised(self, serialized):
	input_features = {
	'input_ids': tf.io.FixedLenFeature([self.max_seq_length], tf.int64),
	'attention_mask': tf.io.FixedLenFeature(
	[self.max_seq_length], tf.int64),
	}
	example = tf.io.parse_example(serialized=serialized, features=input_features)
	# convert to list input as per define in keras model
	results = self.model(example,training=False).pooler_output
	return results

	def export_save_model(self, export_dir):
	export_dir = "%s/1/" % export_dir
	tf.saved_model.save(self.model, export_dir,
	{'predict_b64': self.predict_export_serialised,
	'predict': self.predict_export})

	ModelFn(tf_model).export_save_model('model')

	class TransformerDataset(torch.utils.data.Dataset):
	def __init__(self, encodings):
	self.encodings = encodings

	def __getitem__(self, idx):
	item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
	return item

	def __len__(self):
	return len(self.encodings['input_ids'])

	ds = TransformerDataset(dataset)
	trainloader=torch.utils.data.DataLoader(ds, batch_size=32, shuffle=False, num_workers=8)

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model.to(device)

	for batch in trainloader:
	batch = {k: v.to(device) for k, v in batch.items()}
	outputs = model(**batch)
	print(outputs.pooler_output)
	import torch
	from transformers import AutoTokenizer, T5ForConditionalGeneration

	passages = [['7744105', 'For Earth-centered it was Geocentric Theory proposed by greeks under the guidance of Ptolemy and Sun-centered was Heliocentric theory proposed by Nicolas Copernicus in 16th century A.D. In short, Your Answers are: 1st blank - Geo-Centric Theory. 2nd blank - Heliocentric Theory.'], ['2593796', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.he geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.'], ['6217200', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.opernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.'], ['3276925', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.Simple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect.ou might want to check out one article on the history of the geocentric model and one regarding the geocentric theory. Here are links to two other articles from Universe Today on what the center of the universe is and Galileo one of the advocates of the heliocentric model.'], ['6217208', 'Copernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.Simple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect.opernicus proposed a heliocentric model of the solar system â\x80\x93 a model where everything orbited around the Sun. Today, with advancements in science and technology, the geocentric model seems preposterous.'], ['4280557', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 90 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.imple tools, such as the telescope â\x80\x93 which helped convince Galileo that the Earth was not the center of the universe â\x80\x93 can prove that ancient theory incorrect. You might want to check out one article on the history of the geocentric model and one regarding the geocentric theory.'], ['264181', 'Nicolaus Copernicus (b. 1473â\x80\x93d. 1543) was the first modern author to propose a heliocentric theory of the universe. From the time that Ptolemy of Alexandria (c. 150 CE) constructed a mathematically competent version of geocentric astronomy to Copernicusâ\x80\x99s mature heliocentric version (1543), experts knew that the Ptolemaic system diverged from the geocentric concentric-sphere conception of Aristotle.'], ['4280558', 'A Geocentric theory is an astronomical theory which describes the universe as a Geocentric system, i.e., a system which puts the Earth in the center of the universe, and describes other objects from the point of view of the Earth. Geocentric theory is an astronomical theory which describes the universe as a Geocentric system, i.e., a system which puts the Earth in the center of the universe, and describes other objects from the point of view of the Earth.'], ['3276926', 'The geocentric model, also known as the Ptolemaic system, is a theory that was developed by philosophers in Ancient Greece and was named after the philosopher Claudius Ptolemy who lived circa 91 to 168 A.D. It was developed to explain how the planets, the Sun, and even the stars orbit around the Earth.ou might want to check out one article on the history of the geocentric model and one regarding the geocentric theory. Here are links to two other articles from Universe Today on what the center of the universe is and Galileo one of the advocates of the heliocentric model.'], ['5183032', "After 1,400 years, Copernicus was the first to propose a theory which differed from Ptolemy's geocentric system, according to which the earth is at rest in the center with the rest of the planets revolving around it."]]

	query = 'who proposed the geocentric theory'

	pattern = "Query: {query} Document: {document} Relevant:"

	model = 'castorini/monot5-base-msmarco'
	texts = [p[1] for p in passages]

	model = T5ForConditionalGeneration.from_pretrained(model)
	tokenizer = AutoTokenizer.from_pretrained('t5-base', use_fast=False)
	EOS = tokenizer.eos_token
	MAX_LENGTH= 512
	t4_tokenizer = {'return_attention_mask': True,
	'padding': 'longest',
	'truncation': True,
	'max_length': 512}


	dataset = tokenizer.batch_encode_plus(
	[pattern.format(query=query, document=document) for
	document in
	texts],**t4_tokenizer)



	@torch.no_grad()
	def greedy_decode(model,
	input_ids: torch.Tensor,
	length: int,
	attention_mask: torch.Tensor = None,
	return_last_logits: bool = True):
	decode_ids = torch.full((input_ids.size(0), 1),
	model.config.decoder_start_token_id,
	dtype=torch.long).to(input_ids.device)
	encoder_outputs = model.get_encoder()(input_ids, attention_mask=attention_mask)
	next_token_logits = None
	for _ in range(length):
	model_inputs = model.prepare_inputs_for_generation(
	decode_ids,
	encoder_outputs=encoder_outputs,
	past=None,
	attention_mask=attention_mask,
	use_cache=True)
	outputs = model(**model_inputs) # (batch_size, cur_len, vocab_size)
	next_token_logits = outputs[0][:, -1, :] # (batch_size, vocab_size)
	decode_ids = torch.cat([decode_ids,
	next_token_logits.max(1)[1].unsqueeze(-1)],
	dim=-1)
	if return_last_logits:
	return decode_ids, next_token_logits
	return decode_ids

	import torch

	class MonoT5Dataset(torch.utils.data.Dataset):
	def __init__(self, encodings):
	self.encodings = encodings

	def __getitem__(self, idx):
	item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
	return item

	def __len__(self):
	return len(self.encodings['input_ids'])

	ds = MonoT5Dataset(dataset)
	trainloader=torch.utils.data.DataLoader(ds, batch_size=32, shuffle=False, num_workers=8)

	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	model.to(device)

	for batch in trainloader:
	input_ids = batch['input_ids'].to(device)
	attn_mask = batch['attention_mask'].to(device)
	_, batch_scores = greedy_decode(model,
	input_ids,
	length=1,
	attention_mask=attn_mask,
	return_last_logits=True)
	# 6136 and 1176 are the indexes of the tokens false and true in T6.
	batch_scores = batch_scores[:, [6136, 1176]]
	batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1)
	batch_log_probs = batch_scores[:, 1].tolist()
	for score in batch_log_probs:
	print(score)
	"""
	This examples demonstrates the setup for Query / Question-Answer-Retrieval.

	You can input a query or a question. The script then uses semantic search
	to find relevant passages in Simple English Wikipedia (as it is smaller and fits better in RAM).

	For semantic search, we use SentenceTransformer('msmarco-distilbert-base-v2') and retrieve
	100 potentially passages that answer the input query.

	Next, we use a more powerful CrossEncoder (cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')) that
	scores the query and all retrieved passages for their relevancy. The cross-encoder is neccessary to filter out certain noise
	that might be retrieved from the semantic search step.

	Google Colab Example: https://colab.research.google.com/drive/1l6stpYdRMmeDBK_vw0L5NitdiAuhdsAr?usp=sharing
	"""
	import json
	from pathlib import Path

	from sentence_transformers import SentenceTransformer, CrossEncoder, util
	import time
	import gzip
	import os
	import torch

	#We use the Bi-Encoder to encode all passages, so that we can use it with sematic search
	model_name = 'msmarco-distilbert-base-v2'
	bi_encoder = SentenceTransformer(model_name)
	top_k = 100 #Number of passages we want to retrieve with the bi-encoder

	#The bi-encoder will retrieve 100 documents. We use a cross-encoder, to re-rank the results list to improve the quality
	cross_encoder = CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-6')

	# As dataset, we use Simple English Wikipedia. Compared to the full English wikipedia, it has only
	# about 170k articles. We split these articles into paragraphs and encode them with the bi-encoder

	wikipedia_filepath = 'data/simplewiki-2020-11-01.jsonl.gz'

	if not os.path.exists(wikipedia_filepath):
	util.http_get('http://sbert.net/datasets/simplewiki-2020-11-01.jsonl.gz', wikipedia_filepath)

	passages = []
	with gzip.open(wikipedia_filepath, 'rt', encoding='utf8') as fIn:
	for line in fIn:
	data = json.loads(line.strip())
	passages.extend(data['paragraphs'])

	#If you like, you can also limit the number of passages you want to use
	print("Passages:", len(passages))

	# To speed things up, pre-computed embeddings are downloaded.
	# The provided file encoded the passages with the model 'msmarco-distilbert-base-v2'
	embeddings_filepath = f'{Path(wikipedia_filepath).stem}-{model_name}.pt'

	if os.path.exists(embeddings_filepath):
	corpus_embeddings = torch.load(embeddings_filepath)
	corpus_embeddings = corpus_embeddings.float() #Convert embedding file to float
	if torch.cuda.is_available():
	corpus_embeddings = corpus_embeddings.to('cuda')
	else: #Here, we compute the corpus_embeddings from scratch (which can take a while depending on the GPU)
	corpus_embeddings = bi_encoder.encode(passages, convert_to_tensor=True, show_progress_bar=True)
	torch.save(corpus_embeddings,Path(embeddings_filepath))

	while True:
	query = input("Please enter a question: ")

	#Encode the query using the bi-encoder and find potentially relevant passages
	start_time = time.time()
	question_embedding = bi_encoder.encode(query, convert_to_tensor=True)
	hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=top_k)
	hits = hits[0] # Get the hits for the first query

	#Now, score all retrieved passages with the cross_encoder
	cross_inp = [[query, passages[hit['corpus_id']]] for hit in hits]
	cross_scores = cross_encoder.predict(cross_inp)

	#Sort results by the cross-encoder scores
	for idx in range(len(cross_scores)):
	hits[idx]['cross-score'] = cross_scores[idx]

	hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
	end_time = time.time()

	#Output of top-5 hits
	print("Input question:", query)
	print("Results (after {:.3f} seconds):".format(end_time - start_time))
	for hit in hits[0:5]:
	print("\t{:.3f}\t{}".format(hit['cross-score'], passages[hit['corpus_id']]))

	print("\n\n========\n")