This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on https://arxiv.org/pdf/1404.2188.pdf | |
# Anton Melnikov | |
import torch | |
from torch import nn | |
import torch.nn.functional as F | |
import numpy as np | |
class DynamicKMaxPooling(nn.Module): | |
def __init__(self, k_top, L): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch import nn | |
import torch.nn.functional as F | |
class WordLevelAttention(nn.Module): | |
# this follows the word-level attention from Yang et al. 2016 | |
# https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf | |
def __init__(self, n_hidden, *, batch_first=False): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import math | |
from operator import add, sub | |
import random | |
true_threshold = random.random() | |
print('true threshold:', true_threshold) | |
def get_score(threshold): | |
# simulate some scoring function |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from num2words import num2words | |
import re | |
def nums_to_words(string): | |
while True: | |
m = re.search('\d+', string) | |
if not m: | |
break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Anton Melnikov | |
# used https://martin-thoma.com/word-error-rate-calculation/ | |
# and http://progfruits.blogspot.com/2014/02/word-error-rate-wer-and-word.html | |
# as reference | |
import numpy as np | |
def get_wer(reference, hypothesis): | |
# create the matrices |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_parallel_chunks(sequence, num_chunks, chunk_size): | |
for i in range(0, num_chunks, chunk_size): | |
yield itertools.islice(sequence, i, chunk_size + i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_ngrams(iterable): | |
length = len(iterable) | |
if length == 1: | |
yield iterable[0] | |
return | |
# the 'starting position' loop | |
for n in range(length): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Readable ILGA | |
// @namespace http://ostensible.me | |
// @version 0.1 | |
// @description make ILGA.gov readable | |
// @author Anton Osten | |
// @match http://*.ilga.gov/* | |
// @grant none | |
// ==/UserScript== |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from enum import Enum | |
from itertools import chain | |
import re | |
import pprint | |
filepath = "chukchi_texts.txt" | |
#pprint.pprint(words) |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
[ | |
"lyfätösivözi", | |
"änydezäqepez", | |
"päwidefyföjä", | |
"evepägäniwyk", | |
"xylyxäzöbewe", | |
"ixäbörevögeh", | |
"öwygyvywäwyg", | |
"zexirilähiqö", |