Skip to content

Instantly share code, notes, and snippets.

@anna-hope
anna-hope / dynamic_k_max.py
Last active October 9, 2019 06:06
Dynamic K-Max pooling in PyTorch (Kalchbrenner et al. 2014)
# Based on https://arxiv.org/pdf/1404.2188.pdf
# Anton Melnikov
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np
class DynamicKMaxPooling(nn.Module):
def __init__(self, k_top, L):
@anna-hope
anna-hope / word_attention.py
Last active March 22, 2018 03:43
Word-level attention from Yang et. al 2016 in PyTorch
import torch
from torch import nn
import torch.nn.functional as F
class WordLevelAttention(nn.Module):
# this follows the word-level attention from Yang et al. 2016
# https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf
def __init__(self, n_hidden, *, batch_first=False):
@anna-hope
anna-hope / find_threshold.py
Last active March 3, 2018 03:03
Find a threshold maximising the score of a given function
import math
from operator import add, sub
import random
true_threshold = random.random()
print('true threshold:', true_threshold)
def get_score(threshold):
# simulate some scoring function
@anna-hope
anna-hope / nums_to_words.py
Created December 27, 2016 03:48
Converts all numbers in a string to words (relies on the num2words library)
from num2words import num2words
import re
def nums_to_words(string):
while True:
m = re.search('\d+', string)
if not m:
break
@anna-hope
anna-hope / wer.py
Last active December 15, 2016 02:55
Word Error Rate
# Anton Melnikov
# used https://martin-thoma.com/word-error-rate-calculation/
# and http://progfruits.blogspot.com/2014/02/word-error-rate-wer-and-word.html
# as reference
import numpy as np
def get_wer(reference, hypothesis):
# create the matrices
@anna-hope
anna-hope / make_chunks.py
Last active November 12, 2015 20:37
make chunks from an iterable for parallel processing
def make_parallel_chunks(sequence, num_chunks, chunk_size):
for i in range(0, num_chunks, chunk_size):
yield itertools.islice(sequence, i, chunk_size + i)
@anna-hope
anna-hope / get_ngrams.py
Last active November 5, 2015 23:12
get all possible n-grams from an iterable
def get_ngrams(iterable):
length = len(iterable)
if length == 1:
yield iterable[0]
return
# the 'starting position' loop
for n in range(length):
@anna-hope
anna-hope / readable_ilga.user.js
Last active August 29, 2015 14:27
make Illinois law database website readable
// ==UserScript==
// @name Readable ILGA
// @namespace http://ostensible.me
// @version 0.1
// @description make ILGA.gov readable
// @author Anton Osten
// @match http://*.ilga.gov/*
// @grant none
// ==/UserScript==
#!/usr/bin/env python3
from enum import Enum
from itertools import chain
import re
import pprint
filepath = "chukchi_texts.txt"
#pprint.pprint(words)
@anna-hope
anna-hope / finnish_like_harmony.json
Created August 12, 2015 04:36
a corpus of gibberish which follows the rules of Finnish vowel harmony
This file has been truncated, but you can view the full file.
[
[
"lyfätösivözi",
"änydezäqepez",
"päwidefyföjä",
"evepägäniwyk",
"xylyxäzöbewe",
"ixäbörevögeh",
"öwygyvywäwyg",
"zexirilähiqö",