Skip to content

Instantly share code, notes, and snippets.

View minhlab's full-sized avatar

Minh Le minhlab

  • FPT Software
  • Vietanm
View GitHub Profile
from collections import Counter
import numpy as np
from scipy.stats import pearsonr
train = 'output/dep/penntree.jk.sd/train.mrg.dep'
ref = 'output/dep/penntree.jk.sd/valid.mrg.dep'
sys = 'output/dep/sd_parse-published-model_valid.conll'
out_path = 'output/occurrence-performance.npy'
count_path = 'output/occurrence-counts.npy'
import numpy as np
# from deep-coref project: https://github.com/clarkkev/deep-coref
# download w2v_50d.txt from here: https://drive.google.com/file/d/0B5Y5rz_RUKRmdEFPcGIwZ2xLRW8/view
with open('w2v_50d.txt') as f:
word2id = {}
vectors = []
words = []
for line in f:
parts = line.strip().split()
import numpy as np
import sys
def zero_safe_normalization(a):
norms = np.sum(a*a, axis=1)
zero_indices = np.nonzero(norms == 0)[0].tolist()
if zero_indices:
sys.stderr.write('Detect %d zero norms: %s\n' %(len(zero_indices), str(zero_indices)))
norms[zero_indices] = 1.0 # don't worry, 0/1 = 0
a /= norms[:,np.newaxis]
@minhlab
minhlab / count-wiki-cat.py
Created April 14, 2017 09:43
An attempt to count the number of pages (recursively) in a category in Wikipedia. It wasn't successful because the category of Vietnamese Wikipedia is cyclic and confusing.
# -*- coding: utf-8 -*-
import pywikibot
import sys
from collections import Counter
from random import randint
wiki = 'wikipedia'
lang = 'vi'
{
"loss":{
"y":"summed_cross_entropy",
"anaphoricities":"summed_cross_entropy"
},
"theano_mode":null,
"name":"Graph",
"output_config":[
{
"inputs":[
@minhlab
minhlab / theano-test-gpu.py
Last active July 17, 2017 07:18
A wget-able copy of the test script from Theano. For the most recent version, please see http://deeplearning.net/software/theano/tutorial/using_gpu.html#testing-theano-with-gpu
from theano import function, config, shared, tensor
import numpy
import time
vlen = 10 * 30 * 768 # 10 x #cores x # threads per core
iters = 1000
rng = numpy.random.RandomState(22)
x = shared(numpy.asarray(rng.rand(vlen), config.floatX))
f = function([], tensor.exp(x))
@minhlab
minhlab / ecb_stats.py
Created December 9, 2016 13:42
Perform some statistics on ECB given ECB+ directory which include the ECB files (Cybulska and Vossen, 2014)
import os
import re
count = 0
for root, dir_names, file_names in os.walk('ECB+'):
for fname in file_names:
if 'plus' not in fname:
path = os.path.join(root, fname)
with open(path) as f:
content = f.read()
print list(m.group() for m in re.finditer('<token', content))
@minhlab
minhlab / ecb_plus_stats.py
Created December 9, 2016 13:36
Print some statistics of ECB+ (Cybulska and Vossen, 2014)
import os
import re
count = 0
for root, dir_names, file_names in os.walk('ECB+'):
for fname in file_names:
if 'plus' in fname:
path = os.path.join(root, fname)
with open(path) as f:
content = f.read()
print list(m.group() for m in re.finditer('<token', content))
@minhlab
minhlab / cube.java
Last active April 7, 2016 13:37
Multiplication is 100 times faster than Math.pow()
import java.util.*;
import java.lang.*;
import java.io.*;
/* Name of the class has to be "Main" only if the class is public. */
class Ideone
{
public static void main (String[] args) throws java.lang.Exception
{
double a = Math.random();
local MaskedLogSoftMax, Parent = torch.class('nn.MaskedLogSoftMax', 'nn.Module')
function MaskedLogSoftMax:__init(masks, filler)
Parent.__init(self)
self.masks = masks
self.minvals = torch.Tensor()
self.mininds = torch.LongTensor()
self.temp1 = torch.Tensor()
self.temp2 = torch.Tensor()