Skip to content

Instantly share code, notes, and snippets.

@xflr6
Last active June 6, 2022 08:52
Show Gist options
  • Save xflr6/5e8b1e72ef6951e203d8069fdb741b52 to your computer and use it in GitHub Desktop.
Save xflr6/5e8b1e72ef6951e203d8069fdb741b52 to your computer and use it in GitHub Desktop.
"""Benchmark FCA concepts lattice generation with bob_ross.csv."""
from collections.abc import Iterable, Iterator, Sequence
import csv
import os
import pathlib
import time
from typing import NamedTuple, Optional
import urllib.request
import concepts
URL = ('https://raw.githubusercontent.com/fivethirtyeight/data'
'/master/bob-ross/elements-by-episode.csv')
CSV = pathlib.Path(URL.rpartition('/')[2])
OPEN_KWARGS = {'encoding': 'ascii', 'newline': '\n'}
CSV_CONTEXT = CSV.with_name(f'{CSV.stem}-cxt{CSV.suffix}')
CXT_CONTEXT = CSV.with_suffix('.cxt')
def read_episodes(path: os.PathLike | str, *,
dialect: csv.Dialect | type[csv.Dialect] | str = csv.excel):
flags = {'0': False, '1': True}
with open(path, **OPEN_KWARGS) as f:
reader = csv.reader(f, dialect=dialect)
header = next(reader)
fields = dict.fromkeys(header[:2], str) | dict.fromkeys(header[2:], bool)
make_episode = NamedTuple('Episode', fields.items())._make
for episode, title, *elements in reader:
yield make_episode([episode, title] + [flags[e] for e in elements])
def write_csv(path: os.PathLike | str, rows, *,
header: Optional[Sequence[str]] = None,
dialect: csv.Dialect | type[csv.Dialect] | str = csv.excel) -> None:
with open(path, 'w', **OPEN_KWARGS) as f:
writer = csv.writer(f, dialect=dialect)
if header is not None:
writer.writerow(header)
writer.writerows(rows)
def iter_cxt_lines(objects: Sequence[str],
attributes: Sequence[str],
bools: Sequence[Sequence[bool]]) -> Iterator[str]:
assert len(objects) == len(bools)
assert {len(attributes)} == set(map(len, bools))
yield 'B'
yield ''
yield f'{len(objects):d}'
yield f'{len(attributes):d}'
yield ''
yield from objects
yield from attributes
flags = {False: '.', True: 'X'}
for row in bools:
yield ''.join(flags[value] for value in row)
def write_lines(path: os.PathLike | str, lines: Iterable[str]) -> None:
with path.open('w', **OPEN_KWARGS) as f:
for line in lines:
print(line, file=f)
if not CSV.exists():
urllib.request.urlretrieve(URL, CSV)
assert CSV.stat().st_size
if not all(path.exists() for path in (CSV_CONTEXT, CXT_CONTEXT)):
episodes = list(read_episodes(CSV))
header = list(episodes[0]._fields)
header.pop(1) # omit TITLE column
flags = {False: '', True: 'X'}
write_csv(CSV_CONTEXT,
([episode] + [flags[b] for b in bools]
for episode, _, *bools in episodes),
header=header)
lines = iter_cxt_lines(objects=[e[0] for e in episodes],
attributes=header[1:],
bools=[bools for _, _, *bools in episodes])
write_lines(CXT_CONTEXT, lines)
start = time.perf_counter_ns()
context = concepts.load_cxt(CXT_CONTEXT)
assert len(context.objects) == 403
assert len(context.properties) == 67
lattice = context.lattice
assert len(lattice) == 3_463
duration = (time.perf_counter_ns() - start) / 1_000_000_000
print(duration)
# concepts 0.9.2, 2.2 GHz Intel i3-2330M CPU, 4GB RAM: 189s (PY2), 132s (PY3)
# concepts 0.10.dev0, 2.2 GHz Intel i3-2330M CPU, 4GB RAM: 32s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment