kowey’s gists

kowey / gist:5908427

Created July 2, 2013 10:58

	import itertools

	xs = range(1,10)
	evens = itertools.ifilter(lambda x:x % 2 == 0, xs)
	for x in evens: print 'once' , x
	for x in evens: print 'again!', x
	# nope, already consumed
	# should consume first with evens = list(...)
	# or just avoid the generator with plain old filter

kowey / gist:5890034

Last active December 19, 2015 03:28

kowey / gist:5846632

Created June 23, 2013 21:36


	import qualified Data.AttoLisp as L
	import qualified Data.Attoparsec as P


	-- \| Parse a single s-expr followed by optional whitespace
	-- and end of file
	-- Try to give a slightly more helpful error message than
	-- what attoparsec/attolisp can offer
	parseLispOnly :: B.ByteString -> Either String L.Lisp

kowey / gist:5684879

Created May 31, 2013 13:09

	<HTML>
	<HEAD>
	<!-- NEW STUFF! more stylesheets and css for sliders -->
	<LINK link rel="stylesheet" type="text/css" href="css/evaluation.css"/>
	<LINK link rel="stylesheet" type="text/css" href="css/swing/swing.css"/>
	<SCRIPT type="text/javascript" src="js/range.js"></SCRIPT>
	<SCRIPT type="text/javascript" src="js/timer.js"></SCRIPT>
	<SCRIPT type="text/javascript" src="js/slider.js"></SCRIPT>
	</HEAD>
	<BODY>

kowey / gist:5684388

Created May 31, 2013 11:27

	$norm_text = " $norm_text ";
	$norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
	$norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation
	$norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit
	$norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit
	$norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit
	$norm_text =~ s/\s+/ /g; # one space only between words
	$norm_text =~ s/^\s+//; # no leading space
	$norm_text =~ s/\s+$//; # no trailing space

kowey / gist:5517578

Created May 4, 2013 13:53

	--------------------------------------------------------------------------------
	{-# LANGUAGE OverloadedStrings #-}
	{-# LANGUAGE TupleSections #-}

	import Control.Applicative ((<$>))
	import Control.Arrow
	import Control.Monad
	import Data.Char (toLower)
	import Data.Function (on)
	import Data.List (groupBy, nub, sortBy)

kowey / gist:5516966

Last active December 16, 2015 23:50

	-- 1. Data live in directories
	-- systems/{foo,bar,baz}/{development,test}-sentences.csv
	-- henceforth: $sys = {foo,bar,baz}, $dataset={development,test}
	-- 2. Convert them to dist/systems/$sys/${dataset}-sentences.xml
	-- 3. Convert a distinguished system, called the GOLD to dist/reference/${dataset}-sentences.xml
	-- 4. Run a Perl script call mt-eval to compare each dist/systems/$system/${dataset}-sentences.xml
	-- against the gold standard dist/reference/${dataset}-sentences.xml
	-- 5. Collect all the scores and summarise them a single table

kowey / gist:5272976

Created March 29, 2013 19:20

	import nltk.data

	text = "Hello, I am a bit of corpus. Why don't you segment me?"
	tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

	for start,end in tokenizer.span_tokenize(text):
	print "%d\t%d\t%s" % (start, end, text[start:end])


	# 0 28 Hello, I am a bit of corpus.

kowey / gist:5272552

Created March 29, 2013 18:17

	#!/bin/bash
	pushd $(dirname $0) > /dev/null
	SCRIPT_DIR=$PWD
	popd > /dev/null

	java\
	-Dapple.laf.useScreenMenuBar=true\
	-Dcom.apple.mrj.application.apple.menu.about.name=Glozz\
	-jar "$SCRIPT_DIR"/glozz-platform.jar

kowey / gist:5182135

Last active December 15, 2015 01:48

	import Control.Arrow
	import Data.Function
	import Data.List

	buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
	buckets f = map (first head . unzip)
	. groupBy ((==) `on` fst)
	. sortBy (compare `on` fst)
	. map (\x -> (f x, x))

Eric Kow kowey