Skip to content

Instantly share code, notes, and snippets.

@xflr6
xflr6 / autotyp_nlevels.py
Last active October 4, 2025 16:03
Add missing autotyp variable N.levels information from metadata_overview.csv to metadata/*.yaml files
"""Insert missing https://www.autotyp.uzh.ch N.levels from overview into metadata files
see https://github.com/autotyp/autotyp-data/pull/7
"""
import csv
import operator
import pathlib
import regex
@xflr6
xflr6 / unique_null.py
Last active June 4, 2022 13:19
Compare different ways to have unique columns with nulls under SQLite and PostgreSQL
"""Compare ways to have unique columns with NULLs."""
import os
import subprocess
import time
import uuid
import sqlalchemy as sa
import sqlalchemy.orm
@xflr6
xflr6 / pl_pgsql.py
Last active October 11, 2025 13:21
SQL injection safe dynamic query execution via PL/pgSQL quote_ident() and format('%I')
"""SQL-injection safe dynamic query with pl/pgsql."""
import sqlalchemy as sa
UNIQUE_NULL = [('contributioncontributor', ['contribution_pk', 'contributor_pk'], []),
('contributionreference', ['contribution_pk', 'source_pk', 'description'], []),
('editor', ['dataset_pk', 'contributor_pk'], []),
('languageidentifier', ['language_pk', 'identifier_pk'], []),
('languagesource', ['language_pk', 'source_pk'], []),
('sentencereference', ['sentence_pk', 'source_pk', 'description'], []),
@xflr6
xflr6 / decorator.py
Last active October 11, 2025 13:43
Decorator with an optional parameter
"""Decorator with an optional parameter.
See also https://mypy.readthedocs.io/en/stable/generics.html#decorator-factories
"""
from collections.abc import Callable
import functools
from typing import Any, overload
FUNCS = {}
@xflr6
xflr6 / itersplit.py
Last active October 4, 2025 16:17
Split a string into chunks by a pattern matching at the start of each item
r"""Split a string into chunks by a pattern matching at the start of each item.
>>> list(itersplit(r'!', 'spam !eggs !ham'))
['spam ', '!eggs ', '!ham']
>>> list(itersplit(r'X', 'spam !eggs !ham'))
['spam !eggs !ham']
>>> list(itersplit(r'!', '!spam !eggs !ham'))
['', '!spam ', '!eggs ', '!ham']
@xflr6
xflr6 / walk_gdrive.py
Last active October 11, 2025 16:03
Recursively traverse the directory tree of a Google Drive folder as variation of os.walk()
"""os.walk() variation with Google Drive API v3."""
from collections.abc import Iterator, Sequence
import os
import pathlib
from typing import TypedDict
# $ pip install google-api-python-client google-auth-oauthlib
from apiclient import discovery
from google.oauth2 import credentials
@xflr6
xflr6 / gsheets.py
Last active October 11, 2025 21:25
Download all sheets of a Google Docs spreadsheet and export to individual CSV files
"""Download all sheets of a Google Docs spreadsheet as CSV."""
from collections.abc import Sequence
import contextlib
import csv
import itertools
import os
import pathlib
# $ pip install google-api-python-client google-auth-oauthlib
@xflr6
xflr6 / iceportal.py
Last active November 5, 2024 21:33
Download all available audio books from ICE portal
"""Download all available audio books from DB ICE Portal."""
import json
import os
import urllib.parse
import urllib.request
BASE = 'http://iceportal.de/api1/rs/'
@xflr6
xflr6 / feedsizes.py
Last active October 4, 2025 16:39
Compare RSS feed enclosure length with content-length header of file when downloading the URL
"""Compare feed enclosure length with content-length of file url."""
import urllib.request
import xml.etree.ElementTree as etree
URL = 'https://feeds.feedburner.com/thebuglefeed?format=xml'
with urllib.request.urlopen(URL) as f:
tree = etree.parse(f)
@xflr6
xflr6 / xpath.py
Last active October 12, 2025 09:40
Use some advanced XPath features of lxml for scraping html/xml
"""Use advanced XPath features of lxml (see also scrapy parsel)."""
from collections.abc import Callable
import functools
from typing import Any, Self, overload
import urllib.request
import lxml.etree
import lxml.html