Created
February 17, 2023 15:54
-
-
Save Dutcho/6be62b7c0e473f4ccbabb95687921a58 to your computer and use it in GitHub Desktop.
Minimal proof of concept for atomic and iterable string types
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Minimal proof of concept for atomic and iterable string types, 17 Feb 2023. | |
Illustration for Python Ideas discussion | |
see https://discuss.python.org/t/an-interesting-pytype-experiment-and-a-possible-extension-to-strings/23749/51 | |
""" | |
from collections.abc import Container, Sequence, Sized | |
from typing import Final, overload, Self, TYPE_CHECKING | |
class Char: | |
"""Hypothetical char type. | |
>>> Char(65), Char('B'), int(Char('C')) | |
('A', 'B', 67) | |
""" | |
if TYPE_CHECKING: | |
@staticmethod | |
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings | |
print(Char(65), Char('B'), int(Char('C'))) | |
def __init__(self, codepoint: int | str) -> None: | |
self.codepoint: Final = codepoint if isinstance(codepoint, int) else ord(codepoint) | |
def __eq__(self, other: object) -> bool: | |
return isinstance(other, Char) and self.codepoint == other.codepoint | |
def __int__(self) -> int: | |
return self.codepoint | |
def __str__(self) -> str: | |
return chr(self.codepoint) | |
def __repr__(self) -> str: | |
return repr(str(self)) | |
class AtomicText(Sized, Container[Char]): | |
"""Atomic non-iterable string (named 'text' to differentiate). | |
Various equivalents of all string methods were omitted for brevity. | |
>>> a = AtomicText('hello') | |
>>> a, len(a), Char('e') in a | |
('hello', 5, True) | |
>>> a[0] # below runtime error and mypy error: Value of type "AtomicText" is not indexable [index] | |
Traceback (most recent call last): | |
TypeError: 'AtomicText' object is not subscriptable | |
""" | |
if TYPE_CHECKING: | |
@staticmethod | |
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings | |
a = AtomicText('hello') | |
print(a, len(a), Char('e') in a) | |
print(a[0]) | |
def __init__(self, plain_old_string: str = '', /) -> None: | |
self.chars: Final[tuple[Char, ...]] = tuple(Char(ord(ch)) for ch in plain_old_string) | |
def __len__(self) -> int: | |
return len(self.chars) | |
def __contains__(self, ch: object, /) -> bool: # semantics differ from str.__contains__ | |
return isinstance(ch, Char) and ch in self.chars | |
def __str__(self) -> str: | |
return ''.join(str(ch) for ch in self.chars) | |
def __repr__(self) -> str: | |
return repr(str(self)) | |
class IterableText(AtomicText, Sequence[Char]): | |
"""Iterable version of Text, which closely mimics plain old string. | |
>>> for t in IterableText('hello'), 'hello': # IterableText is indistinguishable from plain old string | |
... t, t[0], t[-4:], *t | |
('hello', 'h', 'ello', 'h', 'e', 'l', 'l', 'o') | |
('hello', 'h', 'ello', 'h', 'e', 'l', 'l', 'o') | |
""" | |
if TYPE_CHECKING: | |
@staticmethod | |
def _typing_checks() -> None: # because mypy doesn't check doctests in docstrings | |
for t in IterableText('hello'), 'hello': | |
print(t, t[0], t[-4:], *t) | |
@overload | |
def __getitem__(self, index: int, /) -> Char: | |
... | |
@overload | |
def __getitem__(self, sl: slice, /) -> Self: | |
... | |
def __getitem__(self, item: int | slice, /) -> Char | Self: | |
if isinstance(item, int): | |
return self.chars[item] | |
else: | |
cls = type(self) | |
return cls(''.join(str(ch) for ch in self.chars[item])) | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment