This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from functools import reduce | |
def compose(*functions): | |
return reduce(lambda f, g: lambda x: f(g(x)), functions, lambda x: x) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def calculate_cosine(query, X, k=10, threshold=0.5): | |
""" | |
Arguments: | |
query: (dim,) or (dim, 1)-array | |
X: (dim, N)-array of N-vectors | |
Returns: | |
topk_score_indices: sorted top-k-scored indices of N-vectors | |
topk_score: and its scores |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itertools import tee, zip_longest | |
def ngrams(iterable, n=3): | |
""" | |
>>> list(ngrams(range(5), 3)) | |
[(0, 1, 2), (1, 2, 3), (2, 3, 4)] | |
""" | |
ts = tee(iterable, n) | |
for i, t in enumerate(ts[1:]): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
def dict_of_list(keys, values): | |
assert(len(keys) == len(values)) | |
key2values = defaultdict(list) | |
for k, v in zip(keys, values): | |
key2values[k].append(v) | |
return key2values |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def katakana_to_hiragana(string): | |
result = '' | |
for character in string: | |
code = ord(character) | |
if ord('ァ') <= code <= ord('ヶ'): | |
result += chr(code - ord('ァ') + ord('ぁ')) | |
else: | |
result += character | |
return result |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
'NAME': | |
{ | |
'Name_Other': {}, | |
'Person': {}, | |
'God': {}, | |
'Organization': | |
{ | |
'Organization_Other': {}, | |
'International_Organization': {}, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Usage: python remove_output.py notebook.ipynb [ > without_output.ipynb ] | |
Modified from remove_output by Minrk | |
""" | |
import sys | |
import io | |
import os | |
from IPython.nbformat.current import read, write |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import contextlib | |
import time | |
@contextlib.contextmanager | |
def time_measure(ident): | |
tstart = time.time() | |
yield | |
elapsed = time.time() - tstart | |
print(f"{ident}: {elapsed} sec") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pickle | |
import subprocess | |
from pathlib import Path | |
import sys | |
from lxml import etree | |
def extract_textbox_slide(page): | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df = df.set_index('date') | |
for date, new_df in df.groupby(level=0): | |
print(new_df) |