This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
An implementation of the Bradley-Terry ranking aggregation algorithm from the paper | |
MM algorithms for generalized Bradley-Terry models | |
<https://doi.org/10.1214/aos/1079120141>. | |
""" | |
__author__ = 'Dmitry Ustalov' | |
__copyright__ = 'Copyright 2021 Dmitry Ustalov' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh -eu | |
CWD=$(basename "$PWD") | |
XZ_OPT="-T 0" exec tar --exclude '*~' -C ../ -cJvf "../$CWD.tar.xz" "$CWD" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WATSET ?= ../watset-java/target/watset.jar | |
LCC ?= ../lcc | |
export LANG:=en_US.UTF-8 | |
export LC_COLLATE:=C | |
export CLASSPATH := $(WATSET) | |
nodes: | |
cut -f1,2 $(LCC)/eng_news_2016_10K/eng_news_2016_10K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l | |
cut -f1,2 $(LCC)/eng_news_2016_30K/eng_news_2016_30K-co_s.txt | sed -re 's/\t/\n/g' | sort -u | wc -l |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
__author__ = 'Dmitry Ustalov' | |
__credits__ = 'Sebastian Padó' | |
__license__ = 'MIT' | |
# This is an MIT-licensed implementation of the sigf toolkit for randomization tests: | |
# https://nlpado.de/~sebastian/software/sigf.shtml | |
import random |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env groovy | |
import org.apache.commons.math3.stat.descriptive.moment.Mean | |
import org.apache.commons.math3.stat.descriptive.moment.StandardDeviation | |
import org.jgrapht.graph.SimpleWeightedGraph | |
import org.jgrapht.util.SupplierUtil | |
import org.nlpub.watset.graph.ChineseWhispers | |
import org.nlpub.watset.graph.NodeWeighting | |
import org.nlpub.watset.graph.MaxMax | |
import org.nlpub.watset.eval.Measurer | |
import org.nlpub.watset.graph.Watset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
LC_COLLATE = C | |
SEED = 1337 | |
WCL_WRAPPER = /srv/definitions/wcl-extract | |
measure: | |
./measure.py | |
kfold: wiki_really_all.txt | |
./kfold.py --seed=$(SEED) $< |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This script computes the normalized modified purity and inverse purity | |
# as according to this paper: https://aclweb.org/anthology/P14-1097. | |
# In fact, this program is currently quite a rough translation of | |
# the evaluation-verb-classes.perl script provided by Daisuke Kawahara. | |
import argparse | |
import re | |
import sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
BEGIN { | |
# significance level | |
if (length(ALPHA) == 0) ALPHA = 0.05; | |
# standard error estimation method: "basic" or "pooled" | |
if (length(SE) == 0) SE = "basic"; | |
# one-tailed or two-tailed? | |
if (TAILS != 2) TAILS = 1; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import de.tudarmstadt.ukp.jwktl.JWKTL; | |
import de.tudarmstadt.ukp.jwktl.api.filter.WiktionaryEntryFilter; | |
import de.tudarmstadt.ukp.jwktl.api.util.Language; | |
import java.io.File; | |
import java.util.Locale; | |
public class ExtractRelations { | |
public static void main(String[] args) { | |
if (args.length != 1) { | |
System.err.println("Usage: java ExtractRelations.java database [filter]"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash -e | |
S=$(head -1) | |
CHARSETS=(utf8 cp1251 cp1252 koi8r koi8u iso-8859-5 maccyrillic) | |
for c1 in ${CHARSETS[*]}; do | |
for c2 in ${CHARSETS[*]}; do | |
for c3 in ${CHARSETS[*]}; do | |
for c4 in ${CHARSETS[*]}; do | |
echo -ne "$c1\t$c2\t$c3\t$c4\t" | |
<<<$S iconv -f=$c1 -t=$c2 -c | iconv -f=$c3 -t=$c4 -c | |
done |
NewerOlder