I hereby claim:
- I am jkff on github.
- I am jkff (https://keybase.io/jkff) on keybase.
- I have a public key whose fingerprint is CE4D BD42 5FCD 7379 CE9A 7BD8 0AE0 2A9A AE99 C3CB
To claim this, I am signing this object:
| def expected_fraction_of_corpus_understood(phrases): | |
| N = len(phrases) | |
| num_total_words = sum(len(phrase) for phrase in phrases) | |
| phrase_length_fit = fit_phrase_length(phrases) | |
| ((k, beta, _), _) = fit_heaps_law(phrases) | |
| def result(n, s): | |
| w = 1.0 * n / N * num_total_words | |
| omega = k * beta * w ** (beta - 1) | |
| return 1.0 / N * (s + 1.0 * s / n * (N - n) * (1 - p_phrase_has_new_word(omega, phrase_length_fit))) | |
| return result |
| import pickle | |
| import random | |
| from scipy.optimize import curve_fit | |
| def shuffled(x): | |
| res = list(x) | |
| random.shuffle(res) | |
| return res | |
| def fit_heaps_law(phrases): |
| import nltk | |
| import pickle | |
| import random | |
| # I cleaned up the data manually in Vim. | |
| lines = list(open('movie_lines.tsv').readlines()) | |
| random.shuffle(lines) | |
| tagged = [ | |
| # Split lines into sentences; split sentences into words; tag words with | |
| # part of speech (POS). |
I hereby claim:
To claim this, I am signing this object:
| {-# LANGUAGE BangPatterns #-} | |
| import Control.Applicative | |
| import Prelude hiding (minimum, sum) | |
| import Data.List (permutations) | |
| import Control.Monad.ST | |
| import Data.Bifunctor (second) | |
| import Data.Vector.Unboxed (Vector) | |
| import qualified Data.Vector.Unboxed as U | |
| import Data.Vector.Unboxed.Mutable (STVector) | |
| import qualified Data.Vector.Unboxed.Mutable as UM |
| #include <stdio.h> | |
| void printTree(int *idx, int *post, int rootIdx, int n) { | |
| int root = post[rootIdx]; | |
| printf("%d", root); | |
| if(root == n-1 || idx[root+1] > rootIdx) return; | |
| printf(" { "); |
| module Main where | |
| import qualified Data.ByteString.Lazy.Char8 as B | |
| import System.Environment (getArgs) | |
| import Data.Word | |
| import Data.List | |
| import Data.Array.Base (unsafeAt) | |
| import Data.Array.Unboxed | |
| import Data.Monoid | |
| import Blaze.ByteString.Builder |
| public class Guard : IDisposable | |
| { | |
| private List<IDisposable> toDispose = new List<IDisposable>(); | |
| public void Add(IDisposable d) { toDispose.Add(d); } | |
| public void Dispose() { foreach(var x in toDispose) x.Dispose(); } | |
| public void Discharge() { toDispose.Clear(); } | |
| public static void Do(Action<Guard> a) | |
| { |
| import java.util.*; | |
| public class StringBinarySearch { | |
| private static class ListDictionary implements Dictionary { | |
| private List<String> ss; | |
| public ListDictionary(List<String> ss) { this.ss = ss; } | |
| public String getWordAt(int i) throws IndexOutOfBoundsException { | |
| return i < 0 || i >= ss.size() ? null : ss.get(i); | |
| } | |
| } |
| module ProblemK (Numeric, Table, showTable, evalTable, parseTable) | |
| where | |
| import Data.Char (isNumber, isSpace, isAlpha, toUpper) | |
| import Control.Monad | |
| import Data.Maybe (Maybe (..), isJust, maybeToList) | |
| import qualified Data.Map as M (Map(), unions, fromList, lookup, findWithDefault, insert, toList) | |
| import qualified Data.Set as S (Set (..), member, empty, insert) | |
| import Data.List (intercalate) |