Skip to content

Instantly share code, notes, and snippets.

"""Interactive scatter plot using MPLD3 with API inspired by seaborn."""
import mpld3
import numpy as np
import pandas
import matplotlib
import matplotlib.cm as cm
import matplotlib.lines as mlines
import matplotlib.pyplot as plt
@andreasvc
andreasvc / aclrename.py
Created January 31, 2016 17:37
Script to rename papers from ACL Anthology to 'author year title.pdf'
"""Script to rename papers from ACL Anthology to 'author year title.pdf'
Given PDF files from the ACL anthology http://aclweb.org/anthology/
downloads bibtex file and extracts author, year, title
to suggest more descriptive names.
Before: N04-1016.pdf
After: Lapata & Keller 2004 The Web as a Baseline: Evaluating the Perform[...]
Usage:
"""Tool to check if function/class definitions in Python files match with
their __all__ attribute. Rudimentary support for Cython.
"""
import sys
import re
from collections import Counter
for filename in sys.argv[1:]:
with open(filename, 'rt') as inp:
@andreasvc
andreasvc / README.md
Last active February 6, 2018 21:59
Word lists for extraction of physical descriptions

Word lists for extraction of physical descriptions

These are XPath macros used in our DSH paper on physical descriptions of appearance.

English translation of macro names

  • uiterlijkN = looksN
  • uiterlijkA = looksA
  • persoon = person
  • kleding = clothing
"""Advent of Code 2017. http://adventofcode.com/2017 """
import sys
import array
from collections import Counter, defaultdict
from operator import xor
from functools import reduce
from itertools import count
from binascii import hexlify
import numpy as np
"""Run with python -c 'import pyximport; pyximport.install(); import cellbench; cellbench.main()'
"""
from libc.stdint cimport uint32_t
from libc.math cimport sqrt, modf
from libc.math cimport round as c_round
ctypedef uint32_t Label
cdef inline size_t cellidx(short start, short end, short lensent,
Label nonterminals):
@andreasvc
andreasvc / preprocess.py
Created February 10, 2019 15:05
Preprocess movie review polarity dataset v2.0
"""Preprocess movie review polarity dataset v2.0.
http://www.cs.cornell.edu/people/pabo/movie-review-data/
"""
import os
import re
import glob
import random
from syntok.tokenizer import Tokenizer
def process(path, pattern, out):
@andreasvc
andreasvc / xmientityrename.py
Last active April 1, 2019 13:41
Rename numeric entity labels in .xmi file to text of first mention
"""Rename numeric entity labels in .xmi file to text of first mention.
Usage: python3 xmientityrename.py <FILE>...
Original file is modified in-place.
Only non-empty entities with numeric names are changed.
See https://github.com/nilsreiter/CorefAnnotator/issues/173"""
import os
import sys
from lxml import etree
"""Compute complexity metrics from Universal Dependencies.
Usage: python3 udstyle.py [OPTIONS] FILE...
--parse=LANG parse texts with Stanza; provide 2 letter language code
--output=FILENAME write result to a tab-separated file.
--persentence report per sentence results, not mean per document
Reported metrics:
- LEN: mean sentence length in words (excluding punctuation).
- MDD: mean dependency distance (Gibson, 1998).
- NDD: normalized dependency distance (Lei & Jockers, 2018).
@andreasvc
andreasvc / iwpt2013.xml
Last active April 14, 2019 09:47
IWPT 2013
<?xml version='1.0' encoding='UTF-8'?>
<volume id="W13">
<paper id="5700">
<title>Proceedings of The 13th International Conference on Parsing Technologies (IWPT 2013)</title>
<editor><first>Harry</first><last>Bunt</last></editor>
<editor><first>Khalil</first><last>Sima'an</last></editor>
<editor><first>Liang</first><last>Huang</last></editor>
<month>November</month>
<year>2013</year>
<address>Nara, Japan</address>