This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from typing import List, Tuple | |
| import itertools | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| def mmr(doc_embedding: np.ndarray, | |
| word_embeddings: np.ndarray, | |
| words: List[str], | |
| top_n: int = 5, | |
| diversity: float = 0.9) -> List[Tuple[str, float]]: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| !pip install --quiet sense2vec==1.0.3 | |
| !wget https://github.com/explosion/sense2vec/releases/download/v1.0.0/s2v_reddit_2015_md.tar.gz | |
| !tar -xvf s2v_reddit_2015_md.tar.gz | |
| !ls s2v_old | |
| # load sense2vec vectors | |
| from sense2vec import Sense2Vec | |
| s2v = Sense2Vec().from_disk('s2v_old') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| !pip install --quiet sentence_transformers==2.2.0 | |
| from sentence_transformers import SentenceTransformer | |
| model= SentenceTransformer('all-MiniLM-L12-v2') | |
| def get_answer_and_distractor_embeddings(answer,candidate_distractors): | |
| answer_embedding = model.encode([answer]) | |
| distractor_embeddings = model.encode(candidate_distractors) | |
| return answer_embedding,distractor_embeddings |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| originalword = "Barack Obama" | |
| word = originalword.lower() | |
| word = word.replace(" ", "_") | |
| print ("word ",word) | |
| sense = s2v.get_best_sense(word) | |
| print ("Best sense ",sense) | |
| most_similar = s2v.most_similar(sense, n=20) | |
| print (most_similar) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from fastapi import Request,FastAPI | |
| from pydantic import BaseModel | |
| import uvicorn | |
| from transformers.pipelines import pipeline | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| app = FastAPI() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from transformers.pipelines import pipeline | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| torch_device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| print ("Device ", torch_device) | |
| tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
| model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6") | |
| model = model.to(torch_device) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import gradio as gr | |
| def greet(sen): | |
| question,answer,distractors,meaning = getMCQs(sen) | |
| distractors_string = ', '.join(distractors) | |
| return question,answer.capitalize(),distractors_string,meaning | |
| textbox1 = gr.outputs.Textbox( type="auto", label="Question") | |
| textbox2 = gr.outputs.Textbox(type="auto", label="Correct Answer") | |
| textbox3 = gr.outputs.Textbox( type="auto", label="Distractors (wrong choices)") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def getMCQs(sent): | |
| sentence_for_bert = sent.replace("**"," [TGT] ") | |
| sentence_for_bert = " ".join(sentence_for_bert.split()) | |
| # try: | |
| sense,meaning,answer = get_sense(sentence_for_bert) | |
| if sense is not None: | |
| distractors = get_distractors_wordnet(sense,answer) | |
| else: | |
| distractors = ["Word not found in Wordnet. So unable to extract distractors."] | |
| sentence_for_T5 = sent.replace("**"," ") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| import os | |
| from collections import namedtuple | |
| import re | |
| import torch | |
| from tabulate import tabulate | |
| from torch.nn.functional import softmax | |
| from tqdm import tqdm | |
| from transformers import BertTokenizer |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import torch | |
| import math | |
| from transformers import BertModel, BertConfig, BertPreTrainedModel, BertTokenizer | |
| class BertWSD(BertPreTrainedModel): | |
| def __init__(self, config): | |
| super().__init__(config) | |
| self.bert = BertModel(config) | |
| self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) |
NewerOlder