This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def pages_in_wiktionary_category(category_name, language = 'en'): | |
cont = '' | |
while True: | |
url = f'https://{language}.wiktionary.org/w/api.php?action=query&list=categorymembers&cmtitle=Category:{category_name}&cmlimit=500&format=json&cmcontinue={cont}' | |
obj = requests.get(url).json() | |
for x in obj['query']['categorymembers']: yield x['title'] | |
if 'continue' not in obj: break | |
cont = obj['continue']['cmcontinue'] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set encoding=utf-8 | |
set autoindent | |
set expandtab | |
set tabstop=4 | |
set shiftwidth=4 | |
set number | |
set hlsearch incsearch | |
set wildmenu | |
set showcmd | |
syntax on |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def read_conll(filename): | |
df = pd.read_csv(filename, | |
sep = ' ', header = None, keep_default_na = False, | |
names = ['TOKEN', 'POS', 'CHUNK', 'NE'], | |
quoting = 3, skip_blank_lines = False) | |
df['SENTENCE'] = (df.TOKEN == '').cumsum() | |
return df[df.TOKEN != ''] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tensorflow.keras.layers import Input, Dense, Lambda, Reshape, Activation, Layer, LayerNormalization, Add | |
from tensorflow.keras.models import Sequential | |
from tensorflow.keras import Model | |
import tensorflow as tf | |
class SelfAttention(Layer): | |
def __init__(self, heads = 8): | |
super().__init__() | |
self.heads = heads | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
nlp = spacy.load('en') | |
def set_custom_boundaries(doc): | |
for token in doc[:-1]: | |
if token.text == "\n": | |
doc[token.i+1].is_sent_start = True | |
return doc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
import requests, grequests | |
class ProgressSession(): | |
def __init__(self, urls): | |
self.pbar = tqdm(total = len(urls), desc = 'Making async requests') | |
self.urls = urls | |
def update(self, r, *args, **kwargs): | |
if not r.is_redirect: | |
self.pbar.update() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup as bs | |
def html2text(html): | |
soup = bs(html, features='lxml') | |
for script in soup(["script", "style"]): | |
script.decompose() | |
for br in soup.find_all("br"): | |
br.replace_with("\n") | |
return soup.get_text(separator=' ').strip() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import tensorflow_hub as hub | |
from tokenizers import BertWordPieceTokenizer | |
from tensorflow.keras.layers import Input | |
from tensorflow.keras.models import Model | |
import numpy as np | |
class BERTPreprocessor: | |
SEP_TOKEN = '[SEP]' | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def top_k_categorical_accuracy(y_true, y_pred_proba, k=1): | |
return np.equal(np.argsort(y_pred_proba)[:, -k:], y_true[:, None]).any(axis=1).mean() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask | |
from flask_caching import Cache | |
app = Flask(__name__) | |
app.config.from_mapping({"CACHE_TYPE": "simple"}) | |
cache = Cache(app) | |
def approximate_pi(n): | |
output = 0 | |
for i in range(1, n): |
OlderNewer