Skip to content

Instantly share code, notes, and snippets.

View josephcc's full-sized avatar
💭

Joseph Chang josephcc

💭
View GitHub Profile
from random import shuffle
from operator import attrgetter
from sqlalchemy.engine import Engine, create_engine
from sqlalchemy import MetaData
from sqlalchemy.orm import sessionmaker
from lib import Wikipedia
QID = (92,93,94)
#!/usr/bin/env ruby
require 'optparse'
class EM
def initialize(files, no_log)
@logf = no_log or File.open('output.log', 'w')
@streams = files.map { |fn| File.open(fn).map(&:to_f) }
@lbds = [1.0/@streams.size] * @streams.size
#!/usr/bin/env ruby
class EM
def initialize(files, prefix='', init=nil, interpolate=[], uni=false, threshold=0.001/100, log=true)
@threshold = threshold
@logf = File.open("#{prefix}output.log", 'w') if log
@streams = files.map { |fn| File.open(fn).map(&:to_f) }
@streams << [1.0/@streams[0].size] * @streams[0].size if uni
@lbds = init ? init : [1.0/@streams.size] * @streams.size
#!/usr/bin/env ruby
class EM
attr_reader :criterion
def initialize(files, prefix: '', init: nil, weight: nil, uni: false, threshold: 0.001/100, log: true)
@threshold = threshold
@logf = File.open("#{prefix}output.log", 'w') if log
@streams = files.map { |fn| File.open(fn).map(&:to_f) }
#!/usr/bin/env ruby
#requires ruby 2.0+
class EM
attr_reader :criterion
def initialize(files, prefix: '', init: nil, weight: nil, uni: false, \
threshold: 0.001/100, log: true)
@threshold = threshold
import gensim
print 'Using gensim from:', gensim.__file__
from gensim.models import word2vec
print 'Using word2vec optimization level:', word2vec.FAST_VERSION
from gensim.corpora import MmCorpus, Dictionary
import cPickle as pickle
from commands import getoutput as _CMD
BINDIR = './CMU-Cam_Toolkit_v2/bin/'
cmd = -> (exe) { File.join BINDIR, exe }
DATADIR = './data/'
data = -> (fn) { File.join DATADIR, fn }
BINDIR = './CMU-Cam_Toolkit_v2/bin/'
DATADIR = './data/'
VERBOSE = false
class LM
def initialize(input, n: 3, discount: 'good_turing', verbose: VERBOSE)
@n = n
BINDIR = './CMU-Cam_Toolkit_v2/bin/'
DATADIR = './data/'
class LM
attr_accessor :vocab, :fprops, :devmode
def initialize(input, n: 3, discount: 'good_turing', verbose: false, cutoff: nil, vocab: nil, override: false, devmode: false)
@n = n
require 'zlib'
def get_cdf(pdf: PDF)
cdf = [0]
pdf.each{ |p| cdf << cdf[-1] + p }
cdf[1..-1]
end
def sample(n, cdf: CDF, words: WORDS)
out = []