Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / MakeElanSentences.ipynb
Last active October 19, 2022 13:51
Make a skeleton ELAN document from (text, translation) pairs
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / Pandas_read_sparql_query.ipynb
Last active May 22, 2022 09:57
Read pandas.DataFrame from SPARQL query
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / ControlCharacters.ipynb
Last active May 22, 2022 09:56
Drop Glottolog bibfiles for control characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@xflr6
xflr6 / Wikidata.ipynb
Last active May 22, 2022 09:56
Check Glottolog -> Wikidata mapping
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
"""Benchmark FCA concepts lattice generation with bob_ross.csv."""
from collections.abc import Iterable, Iterator, Sequence
import csv
import os
import pathlib
import time
from typing import NamedTuple, Optional
import urllib.request
@xflr6
xflr6 / fca_variants.py
Last active June 4, 2022 14:16
Compare different methods to brute-force FCA concept generation (sets, longs, gmpy2, NumPy bools, Numpy uint64)
"""Compare different brute-force FCA concept generation methods."""
from collections.abc import Iterator, Sequence
from itertools import combinations, compress
import time
import gmpy2
import numpy as np
OBJECTS = ('1s', '1de', '1pe', '1di', '1pi',
@xflr6
xflr6 / walk_subdirs.py
Last active June 4, 2022 13:51
Compare subdirectory generator using os.walk() with one using scandir.scandir()
"""Compare two ways to iterate over subdirectories of a tree."""
from collections.abc import Iterator
import os
import platform
import time
START_DIR = 'c:\\Users' if platform.system() == 'Windows' else '/usr'
@xflr6
xflr6 / shasum_chunked.py
Last active June 4, 2022 18:02
Compare while-loop with break to for-loop with two-argument iter() for iterating over a large file in chunks
"""Compare three ways to iterate over a large file in chunks."""
import functools
import hashlib
import mmap
import os
import pathlib
import shutil
import time
import types
@xflr6
xflr6 / sa_sqlite3_regex.py
Last active June 4, 2022 13:37
Register a Python stdlib re handler with sqlite3 create_function() to use the SQLite REGEXP operator under SQLAlchemy
"""Use Python re for sqlite3 REGEXP operator wíth SQLAlchemy.
added in https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#support-for-sql-regular-expression-operators
"""
import re
from typing import Optional
import sqlalchemy as sa
import sqlalchemy.orm
@xflr6
xflr6 / autotyp_nlevels.py
Last active June 4, 2022 13:32
Add missing autotyp variable N.levels information from metadata_overview.csv to metadata/*.yaml files
"""Insert missing https://www.autotyp.uzh.ch N.levels from overview into metadata files
see https://github.com/autotyp/autotyp-data/pull/7
"""
import csv
import operator
import pathlib
import regex