Skip to content

Instantly share code, notes, and snippets.

import itertools
xs = range(1,10)
evens = itertools.ifilter(lambda x:x % 2 == 0, xs)
for x in evens: print 'once' , x
for x in evens: print 'again!', x
# nope, already consumed
# should consume first with evens = list(...)
# or just avoid the generator with plain old filter
#!/usr/bin/perl
use strict;
binmode(STDOUT, ":utf8");
open (MYFILE, "<:utf8", $ARGV[0]);
while (<MYFILE>) {
chomp;
my @arr = split /[\t\x{00B7}]+/;
import qualified Data.AttoLisp as L
import qualified Data.Attoparsec as P
-- | Parse a single s-expr followed by optional whitespace
-- and end of file
-- Try to give a slightly more helpful error message than
-- what attoparsec/attolisp can offer
parseLispOnly :: B.ByteString -> Either String L.Lisp
<HTML>
<HEAD>
<!-- NEW STUFF! more stylesheets and css for sliders -->
<LINK link rel="stylesheet" type="text/css" href="css/evaluation.css"/>
<LINK link rel="stylesheet" type="text/css" href="css/swing/swing.css"/>
<SCRIPT type="text/javascript" src="js/range.js"></SCRIPT>
<SCRIPT type="text/javascript" src="js/timer.js"></SCRIPT>
<SCRIPT type="text/javascript" src="js/slider.js"></SCRIPT>
</HEAD>
<BODY>
$norm_text = " $norm_text ";
$norm_text =~ tr/[A-Z]/[a-z]/ unless $preserve_case;
$norm_text =~ s/([\{-\~\[-\` -\&\(-\+\:-\@\/])/ $1 /g; # tokenize punctuation
$norm_text =~ s/([^0-9])([\.,])/$1 $2 /g; # tokenize period and comma unless preceded by a digit
$norm_text =~ s/([\.,])([^0-9])/ $1 $2/g; # tokenize period and comma unless followed by a digit
$norm_text =~ s/([0-9])(-)/$1 $2 /g; # tokenize dash when preceded by a digit
$norm_text =~ s/\s+/ /g; # one space only between words
$norm_text =~ s/^\s+//; # no leading space
$norm_text =~ s/\s+$//; # no trailing space
--------------------------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
import Control.Applicative ((<$>))
import Control.Arrow
import Control.Monad
import Data.Char (toLower)
import Data.Function (on)
import Data.List (groupBy, nub, sortBy)
@kowey
kowey / gist:5516966
Last active December 16, 2015 23:50
-- 1. Data live in directories
-- systems/{foo,bar,baz}/{development,test}-sentences.csv
-- henceforth: $sys = {foo,bar,baz}, $dataset={development,test}
-- 2. Convert them to dist/systems/$sys/${dataset}-sentences.xml
-- 3. Convert a distinguished system, called the GOLD to dist/reference/${dataset}-sentences.xml
-- 4. Run a Perl script call mt-eval to compare each dist/systems/$system/${dataset}-sentences.xml
-- against the gold standard dist/reference/${dataset}-sentences.xml
-- 5. Collect all the scores and summarise them a single table
import nltk.data
text = "Hello, I am a bit of corpus. Why don't you segment me?"
tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
for start,end in tokenizer.span_tokenize(text):
print "%d\t%d\t%s" % (start, end, text[start:end])
# 0 28 Hello, I am a bit of corpus.
#!/bin/bash
pushd $(dirname $0) > /dev/null
SCRIPT_DIR=$PWD
popd > /dev/null
java\
-Dapple.laf.useScreenMenuBar=true\
-Dcom.apple.mrj.application.apple.menu.about.name=Glozz\
-jar "$SCRIPT_DIR"/glozz-platform.jar
@kowey
kowey / gist:5182135
Last active December 15, 2015 01:48
import Control.Arrow
import Data.Function
import Data.List
buckets :: Ord b => (a -> b) -> [a] -> [ (b,[a]) ]
buckets f = map (first head . unzip)
. groupBy ((==) `on` fst)
. sortBy (compare `on` fst)
. map (\x -> (f x, x))