These are XPath macros used in our DSH paper on physical descriptions of appearance.
- uiterlijkN = looksN
- uiterlijkA = looksA
- persoon = person
- kleding = clothing
"""Interactive scatter plot using MPLD3 with API inspired by seaborn.""" | |
import mpld3 | |
import numpy as np | |
import pandas | |
import matplotlib | |
import matplotlib.cm as cm | |
import matplotlib.lines as mlines | |
import matplotlib.pyplot as plt | |
"""Script to rename papers from ACL Anthology to 'author year title.pdf' | |
Given PDF files from the ACL anthology http://aclweb.org/anthology/ | |
downloads bibtex file and extracts author, year, title | |
to suggest more descriptive names. | |
Before: N04-1016.pdf | |
After: Lapata & Keller 2004 The Web as a Baseline: Evaluating the Perform[...] | |
Usage: |
"""Tool to check if function/class definitions in Python files match with | |
their __all__ attribute. Rudimentary support for Cython. | |
""" | |
import sys | |
import re | |
from collections import Counter | |
for filename in sys.argv[1:]: | |
with open(filename, 'rt') as inp: |
"""Advent of Code 2017. http://adventofcode.com/2017 """ | |
import sys | |
import array | |
from collections import Counter, defaultdict | |
from operator import xor | |
from functools import reduce | |
from itertools import count | |
from binascii import hexlify | |
import numpy as np |
"""Run with python -c 'import pyximport; pyximport.install(); import cellbench; cellbench.main()' | |
""" | |
from libc.stdint cimport uint32_t | |
from libc.math cimport sqrt, modf | |
from libc.math cimport round as c_round | |
ctypedef uint32_t Label | |
cdef inline size_t cellidx(short start, short end, short lensent, | |
Label nonterminals): |
"""Preprocess movie review polarity dataset v2.0. | |
http://www.cs.cornell.edu/people/pabo/movie-review-data/ | |
""" | |
import os | |
import re | |
import glob | |
import random | |
from syntok.tokenizer import Tokenizer | |
def process(path, pattern, out): |
"""Rename numeric entity labels in .xmi file to text of first mention. | |
Usage: python3 xmientityrename.py <FILE>... | |
Original file is modified in-place. | |
Only non-empty entities with numeric names are changed. | |
See https://github.com/nilsreiter/CorefAnnotator/issues/173""" | |
import os | |
import sys | |
from lxml import etree |
"""Compute complexity metrics from Universal Dependencies. | |
Usage: python3 udstyle.py [OPTIONS] FILE... | |
--parse=LANG parse texts with Stanza; provide 2 letter language code | |
--output=FILENAME write result to a tab-separated file. | |
--persentence report per sentence results, not mean per document | |
Reported metrics: | |
- LEN: mean sentence length in words (excluding punctuation). | |
- MDD: mean dependency distance (Gibson, 1998). | |
- NDD: normalized dependency distance (Lei & Jockers, 2018). |
<?xml version='1.0' encoding='UTF-8'?> | |
<volume id="W13"> | |
<paper id="5700"> | |
<title>Proceedings of The 13th International Conference on Parsing Technologies (IWPT 2013)</title> | |
<editor><first>Harry</first><last>Bunt</last></editor> | |
<editor><first>Khalil</first><last>Sima'an</last></editor> | |
<editor><first>Liang</first><last>Huang</last></editor> | |
<month>November</month> | |
<year>2013</year> | |
<address>Nara, Japan</address> |