This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import urllib.request | |
| from collections import defaultdict | |
| raw_bytes = urllib.request.urlopen( | |
| 'http://www.sls.hawaii.edu/bley-vroman/brown.txt') | |
| brown_corpus = raw_bytes.read().decode('utf8').replace('\r\n', '\n') | |
| B = brown_corpus[:250] | |
| def find_pairs(text): | |
| pairs = defaultdict(int) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pickle | |
| import numpy as np | |
| import pandas as pd | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from tqdm.auto import tqdm | |
| with open('sts_attributions/shelf_approx_attr_l-9_N-100.pkl', 'rb') as inp: | |
| shelf_approx = pickle.load(inp) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import sys | |
| import pickle | |
| import requests | |
| import torch | |
| from sentence_transformers import SentenceTransformer | |
| from sentence_transformers.models import Pooling | |
| from sentence_transformers import util | |
| from xsbert import models |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| from http.server import BaseHTTPRequestHandler, HTTPServer | |
| import pandas as pd | |
| hostName = "localhost" | |
| serverPort = 20000 | |
| # A global variable to store the queue elements | |
| queue = [] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from collections import defaultdict | |
| from itertools import combinations | |
| import pandas as pd | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer, util | |
| def compute_kernel_bias(vecs, k=None): | |
| """ | |
| Code taken from: https://github.com/bojone/BERT-whitening |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import sys | |
| import shutil | |
| def copy_tree(src, dst): | |
| ''' | |
| Copy a directory tree from src to dst ignoring dangling | |
| symlinks, retrieving files symlinks point to, and | |
| breaking the cycles, i.e. never copying the same |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| from math import ceil | |
| from random import shuffle | |
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoTokenizer, AutoModel | |
| from transformers import AdamW, get_scheduler |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| # We're given an n by n distance matrix *D* with transfer | |
| # probabilities for a given pair of nodes (for any feature), | |
| # a feature matrix *M*, and a dropout probability p_d. | |
| # We convert the transfer probabilities to no-transfer probabilities | |
| # and take their logs | |
| L = np.log(1 - D) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Собираем вместе все возможные знаки пунктуации | |
| import sys | |
| from unicodedata import category | |
| chrs = (chr(i) for i in range(sys.maxunicode + 1)) | |
| punctuation = set(c for c in chrs if category(c).startswith("P")) | |
| # Дефис бывает внутри слов | |
| punctuation.remove('-') | |
| def tokenize(s, lower_case=False): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def rm_whitespace(s): | |
| if s.startswith('Ġ'): | |
| return s[1:] | |
| else: | |
| return s | |
| def get_tokens_with_ranges(input_string, tokenizer): | |
| ''' | |
| RoBERTa prepends 'Ġ' to the beginning of what it |