Skip to content

Instantly share code, notes, and snippets.

View neubig's full-sized avatar

Graham Neubig neubig

View GitHub Profile
@neubig
neubig / find-tweets.py
Created September 20, 2012 06:04
A python program to find tweets from a tab-separated file with a regular expression
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import re
import datetime
pattern = ur'電力.*供給'
@neubig
neubig / string-rewriting-kernel.py
Created September 10, 2012 02:41
An example implementation of the String Rewriting Kernel in python
#!/usr/bin/python
# A python implementation of the string rewriting kernel
# by Graham Neubig
#
# Reference:
# Fan Bu, Hang Li, Xiaoyan Zhu. "String Rewriting Kernel". ACL 2012
# http://aclweb.org/anthology-new/P/P12/P12-1047.pdf
from math import factorial
@neubig
neubig / kyteapos2en.pl
Created April 30, 2012 04:08
A program to change KyTea's Japanese POS tags to english tags
#!/usr/bin/perl
# This is a script to change KyTea's POS tags in Japanese to English
# abbreviations
use strict;
use utf8;
use Getopt::Long;
use List::Util qw(sum min max shuffle);
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
@neubig
neubig / test-sample.cc
Created February 3, 2012 02:24
A program to test a multinomial sampler
#include <vector>
#include <iostream>
#include <cstdlib>
#include <cmath>
using namespace std;
int SampleMultinomial(const vector<double> & distribution) {
double value = (double)rand()/RAND_MAX;
for(int i = 0; i < distribution.size(); i++)
@neubig
neubig / print-trees.py
Created October 18, 2011 07:15
A file to print parse trees from standard input using NLTK
#!/usr/bin/python
from nltk.tree import Tree
import sys
# A program to display parse trees (in Penn treebank format) with NLTK
#
# To install NLTK on ubuntu: sudo apt-get install python-nltk
for line in sys.stdin:
@neubig
neubig / margins.R
Created November 18, 2010 13:04
A file to compute and plot LR and SVM margins
set.seed(123141)
fcount <- 20
tcount <- 1
alpha <- 3
xf <- rnorm(fcount,mean=-1, sd=0.7)
xt <- rnorm(tcount,mean=1, sd=0.7)
yf <- mat.or.vec(fcount,1)
yt <- mat.or.vec(tcount,1)
#!/usr/bin/perl
use strict;
use utf8;
use List::Util qw(max min);
binmode STDIN, ":utf8";
binmode STDOUT, ":utf8";
if(@ARGV != 2) {
print STDERR "Usage: counterrors.pl REFERENCE SYSTEM\n";