This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
import requests, grequests | |
class ProgressSession(): | |
def __init__(self, urls): | |
self.pbar = tqdm(total = len(urls), desc = 'Making async requests') | |
self.urls = urls | |
def update(self, r, *args, **kwargs): | |
if not r.is_redirect: | |
self.pbar.update() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
nlp = spacy.load('en') | |
def set_custom_boundaries(doc): | |
for token in doc[:-1]: | |
if token.text == "\n": | |
doc[token.i+1].is_sent_start = True | |
return doc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.layers import Input, Dense, Lambda, Reshape, Activation, Layer, LayerNormalization, Add | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras import Model | |
import tensorflow as tf | |
class SelfAttention(Layer): | |
def __init__(self, heads = 8): | |
super().__init__() | |
self.heads = heads | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def read_conll(filename): | |
df = pd.read_csv(filename, | |
sep = ' ', header = None, keep_default_na = False, | |
names = ['TOKEN', 'POS', 'CHUNK', 'NE'], | |
quoting = 3, skip_blank_lines = False) | |
df['SENTENCE'] = (df.TOKEN == '').cumsum() | |
return df[df.TOKEN != ''] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set encoding=utf-8 | |
set autoindent | |
set expandtab | |
set tabstop=4 | |
set shiftwidth=4 | |
set number | |
set hlsearch incsearch | |
set wildmenu | |
set showcmd | |
syntax on |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def pages_in_wiktionary_category(category_name, language = 'en'): | |
cont = '' | |
while True: | |
url = f'https://{language}.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{category_name}&cmlimit=500&format=json&cmcontinue={cont}' | |
obj = requests.get(url).json() | |
for x in obj['query']['categorymembers']: yield x['title'] | |
if 'continue' not in obj: break | |
cont = obj['continue']['cmcontinue'] |
NewerOlder