Last active
June 6, 2022 08:52
-
-
Save xflr6/5e8b1e72ef6951e203d8069fdb741b52 to your computer and use it in GitHub Desktop.
Compare https://github.com/xflr6/concepts with https://formal-concepts.readthedocs.io/en/latest/benchmarks.html#calculation-of-concept-lattice
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Benchmark FCA concepts lattice generation with bob_ross.csv.""" | |
from collections.abc import Iterable, Iterator, Sequence | |
import csv | |
import os | |
import pathlib | |
import time | |
from typing import NamedTuple, Optional | |
import urllib.request | |
import concepts | |
URL = ('https://raw.githubusercontent.com/fivethirtyeight/data' | |
'/master/bob-ross/elements-by-episode.csv') | |
CSV = pathlib.Path(URL.rpartition('/')[2]) | |
OPEN_KWARGS = {'encoding': 'ascii', 'newline': '\n'} | |
CSV_CONTEXT = CSV.with_name(f'{CSV.stem}-cxt{CSV.suffix}') | |
CXT_CONTEXT = CSV.with_suffix('.cxt') | |
def read_episodes(path: os.PathLike | str, *, | |
dialect: csv.Dialect | type[csv.Dialect] | str = csv.excel): | |
flags = {'0': False, '1': True} | |
with open(path, **OPEN_KWARGS) as f: | |
reader = csv.reader(f, dialect=dialect) | |
header = next(reader) | |
fields = dict.fromkeys(header[:2], str) | dict.fromkeys(header[2:], bool) | |
make_episode = NamedTuple('Episode', fields.items())._make | |
for episode, title, *elements in reader: | |
yield make_episode([episode, title] + [flags[e] for e in elements]) | |
def write_csv(path: os.PathLike | str, rows, *, | |
header: Optional[Sequence[str]] = None, | |
dialect: csv.Dialect | type[csv.Dialect] | str = csv.excel) -> None: | |
with open(path, 'w', **OPEN_KWARGS) as f: | |
writer = csv.writer(f, dialect=dialect) | |
if header is not None: | |
writer.writerow(header) | |
writer.writerows(rows) | |
def iter_cxt_lines(objects: Sequence[str], | |
attributes: Sequence[str], | |
bools: Sequence[Sequence[bool]]) -> Iterator[str]: | |
assert len(objects) == len(bools) | |
assert {len(attributes)} == set(map(len, bools)) | |
yield 'B' | |
yield '' | |
yield f'{len(objects):d}' | |
yield f'{len(attributes):d}' | |
yield '' | |
yield from objects | |
yield from attributes | |
flags = {False: '.', True: 'X'} | |
for row in bools: | |
yield ''.join(flags[value] for value in row) | |
def write_lines(path: os.PathLike | str, lines: Iterable[str]) -> None: | |
with path.open('w', **OPEN_KWARGS) as f: | |
for line in lines: | |
print(line, file=f) | |
if not CSV.exists(): | |
urllib.request.urlretrieve(URL, CSV) | |
assert CSV.stat().st_size | |
if not all(path.exists() for path in (CSV_CONTEXT, CXT_CONTEXT)): | |
episodes = list(read_episodes(CSV)) | |
header = list(episodes[0]._fields) | |
header.pop(1) # omit TITLE column | |
flags = {False: '', True: 'X'} | |
write_csv(CSV_CONTEXT, | |
([episode] + [flags[b] for b in bools] | |
for episode, _, *bools in episodes), | |
header=header) | |
lines = iter_cxt_lines(objects=[e[0] for e in episodes], | |
attributes=header[1:], | |
bools=[bools for _, _, *bools in episodes]) | |
write_lines(CXT_CONTEXT, lines) | |
start = time.perf_counter_ns() | |
context = concepts.load_cxt(CXT_CONTEXT) | |
assert len(context.objects) == 403 | |
assert len(context.properties) == 67 | |
lattice = context.lattice | |
assert len(lattice) == 3_463 | |
duration = (time.perf_counter_ns() - start) / 1_000_000_000 | |
print(duration) | |
# concepts 0.9.2, 2.2 GHz Intel i3-2330M CPU, 4GB RAM: 189s (PY2), 132s (PY3) | |
# concepts 0.10.dev0, 2.2 GHz Intel i3-2330M CPU, 4GB RAM: 32s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment