Last active
May 6, 2022 22:06
-
-
Save ItsDrike/dd3439af3fbd8404dc0c126740702eed to your computer and use it in GitHub Desktop.
Python auto_init
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This attempts to abstarct away the standard way of using `__init__`, | |
the problem it tries to solve is the repetetiveness of using init purely | |
to store it's parameters into the instance under the exactly same name, i.e.: | |
class Stock: | |
def __init__(name, shares, price): | |
self.name = name | |
self.shares = shares | |
self.price = price | |
""" | |
class Structure(object): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`Strucutre._fields`, which will be specified by | |
every class which uses this. | |
""" | |
_fields = [] | |
def __init__(self, *args): | |
for name, val in zip(self._fields, args): | |
setattr(self, name, val) | |
def __repr__(self): | |
args = ", ".join( | |
repr(getattr(self, name)) | |
for name in self._fields | |
) | |
return f"{type(self).__name__}({args})" | |
class Stock(Structure): | |
_fields = ["name", "shares", "price"] | |
""" | |
The problem that we encounter here is that our `__init__` is now very fragile, | |
we can't pass these parameters as keyword arguments because they won't be accepted | |
as there is no implemented way to resolve them into the positional arguments. | |
There's also no ensurance on whether the correct amount of attributes was passed in, | |
because we're using `zip`, it will simply truncate the rest of those fields if | |
we didn't pass all of them and only work with what was passed. This especially | |
becomes a problem when we're passing too many arguments, because any arguments after | |
the 3rd one will simply be accepted but completely ignored. | |
I attempted to solve this problem with the use of signatures in 2_signatures.py | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 1_basic.py | |
As described there, we had a problem with not checking the arguments | |
we're getting with *args in the generated __init__ function. | |
This implementation solves this with the use of signatures which are | |
generated automatically from _fields. We can then simply bind this signature | |
to our *args and **kwargs, which will automatically do all of the work of | |
checking the correctness of those attributes accordingly to our signature. | |
""" | |
from inspect import Parameter, Signature | |
def make_signature(fields): | |
""" | |
Generate `inspect.Signature` from given `fields` list. | |
This will allow us to pass in the arguments as kwargs | |
and will automatically do checking that the correct amount | |
of arguments were passed in. | |
""" | |
return Signature( | |
Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) | |
for name in fields | |
) | |
class Structure(object): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`Strucutre.__signature__`, which will be specified by | |
every class which uses this. This signature will then | |
be used in `__init__` where we will bind it to the passed | |
*args and **kwargs, from which it will be able to | |
automatically properly assign the attributes to our signature | |
with all the needed checks. | |
""" | |
__signature__ = make_signature([]) | |
def __init__(self, *args, **kwargs): | |
bound = self.__signature__.bind(*args, **kwargs) | |
for name, val in bound.arguments.items(): | |
setattr(self, name, val) | |
class Stock(Structure): | |
__structure__ = make_signature(["name", "shares", "price"]) | |
""" | |
As we can see here, this method allows us for much better | |
specification of our fields, but it has a downside, we have | |
to specify the whole signature now, using `make_signature` | |
function, rather than only passing in the `_fileds`, this | |
can be fixed, which is shown in the 3rd part (3_metasignatures.py) | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 2_signatures.py | |
As described there, we had a problem with repetetiveness in defining | |
the signatures manually, this can be automated using metaclasses, | |
which will be shown below. | |
Note that this could also be done with class decorators, but we use metaclasses | |
because we can further expand on them later and add certain things, which wouldn't | |
be easy to do with decorators. Using decorators also breaks static type checking | |
of affected classes, because the class is altered in runtime. Using metaclasses can | |
often fix this problem, because many type-checkers can at least somewhat resolve them. | |
""" | |
from inspect import Parameter, Signature | |
def make_signature(fields): | |
""" | |
Generate `inspect.Signature` from given `fields` list. | |
This will allow us to pass in the arguments as kwargs | |
and will automatically do checking that the correct amount | |
of arguments were passed in. | |
""" | |
return Signature( | |
Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) | |
for name in fields | |
) | |
class StructMeta(type): | |
def __new__(cls, name, bases, clsdict): | |
clsobj = super().__new__(cls, name, bases, clsdict) | |
sig = make_signature(clsobj._fields) | |
setattr(clsobj, "__signature__", sig) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`_fields`, which will automatically be used to generate | |
a `__signature__` in the `StructMeta` defining meta class. | |
""" | |
_fields = [] | |
def __init__(self, *args, **kwargs): | |
bound = self.__signature__.bind(*args, **kwargs) | |
for name, val in bound.arguments.items(): | |
setattr(self, name, val) | |
class Stock(Structure): | |
_fields = ["name", "shares", "price"] | |
""" | |
This will work flawlessly and it's very clear, but our issue now might | |
be to check the correctness of our parameters, users often make mistakes | |
and we can't just assume that the values we'll be getting will be correct. | |
Usually this error checking would happen within our `__init__`, problem is | |
that this is now automatically generated and we don't have a clear way | |
of imposing these restrictions. | |
This is done using using property setters which will be described in | |
the coninuation: 4_correctness.py | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 3_metasignatures.py | |
As described there, we had a problem with ensuring that our parameters in | |
init will follow some imposed restrictions, perhaps for type checking, or | |
even ensuring things like positive integer values, etc. | |
This can be handled by multiple approaches, one of which would be to override | |
the `__init__` method itself, call the super().__init__ and continue from there | |
to impose our restrictions. While this would work, it's quite limiting because | |
the user would still be able to change these variables to invalid things over time, | |
simply by doing: `stock.shares = "hello"`, if we wanted to truly protect these | |
we could achieve that with the use of property setters, as solved here. | |
""" | |
from inspect import Parameter, Signature | |
def make_signature(fields): | |
""" | |
Generate `inspect.Signature` from given `fields` list. | |
This will allow us to pass in the arguments as kwargs | |
and will automatically do checking that the correct amount | |
of arguments were passed in. | |
""" | |
return Signature( | |
Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) | |
for name in fields | |
) | |
class StructMeta(type): | |
def __new__(cls, name, bases, clsdict): | |
clsobj = super().__new__(cls, name, bases, clsdict) | |
sig = make_signature(clsobj._fields) | |
setattr(clsobj, "__signature__", sig) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`_fields`, which will automatically be used to generate | |
a `__signature__` in the `StructMeta` defining meta class. | |
""" | |
_fields = [] | |
def __init__(self, *args, **kwargs): | |
bound = self.__signature__.bind(*args, **kwargs) | |
for name, val in bound.arguments.items(): | |
setattr(self, name, val) | |
class Stock(Structure): | |
_fields = ["name", "shares", "price"] | |
@property | |
def shares(self): | |
return self._shares | |
@shares.setter | |
def shares(self, value): | |
if not isinstance(value, int): | |
raise TypeError("value must be an integer") | |
if value < 0: | |
raise ValueError("value must be positive (>= 0)") | |
self._shares = value | |
@property | |
def price(self): | |
return self._price | |
@price.setter | |
def price(self, value): | |
if not isinstance(value, float): | |
raise TypeError("value must be a float") | |
if value < 0: | |
raise ValueError("value must be positive (>= 0)") | |
self._price = value | |
""" | |
This will work flawlessly and it's clear as to what's being done but there's | |
another issue now. We can see that the setter for price isn't very different | |
from setter for shares. And the getter is pretty much the same. This would be | |
even more obvious with more similar attributes, that require these restrictions. | |
To avoid this repetition, we can basically implement `@property` manually, | |
with what's known as the Descriptor Protocol. | |
This is shown in the continuation: 5_descriptors.py | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 4_correctness.py | |
As described there, even though we managed to impose some restriction on the | |
parameters using property setters, it brought a lot of repetition with it, | |
which would become especially obvious with more attributes that require the | |
same restrictions. | |
This problem is solved here, with the use of the Descriptor Protocol. | |
""" | |
from inspect import Parameter, Signature | |
import re | |
class Descriptor: | |
""" | |
This is a default descriptor implementation, it doesn't really do much | |
but it provides us with a basis for all other descriptors, which can | |
then override these methods (most notable setter, to impose some restrictions). | |
""" | |
def __set_name__(self, owner_cls, name): | |
""" | |
This method was implemented in python 3.6 (PEP 487), for any older versions | |
you'll have to use meta-classes, to handle this automatically, or | |
simply require the user to pass in the `name` in `__init__`. | |
This implementation is shown in `extra_manual_name.py` file. | |
""" | |
self.name = name | |
def __get__(self, instance, owner_cls): | |
if instance is None: | |
return self | |
return instance.__dict__.get(self.name) | |
def __set__(self, instance, value): | |
instance.__dict__[self.name] = value | |
def __delete__(self, instance): | |
del instance.__dict__[self.name] | |
class Typed(Descriptor): | |
"""This is a general descriptor for enforcing types, it's expected to be subclassed.""" | |
ty = object # Expected type | |
def __set__(self, instance, value): | |
if not isinstance(value, self.ty): | |
raise TypeError(f"Expected {self.ty.__name__}, got {type(value).__name__}.") | |
return super().__set__(instance, value) | |
class Integer(Typed): | |
ty = int | |
class Float(Typed): | |
ty = float | |
def __set__(self, instance, value): | |
if isinstance(value, int): | |
value = float(value) | |
return super().__set__(instance, value) | |
class String(Typed): | |
ty = str | |
class Positive(Descriptor): | |
def __set__(self, instance, value): | |
if value < 0: | |
raise ValueError("Value must be positive (>= 0)") | |
return super().__set__(instance, value) | |
class PositiveInteger(Integer, Positive): | |
pass | |
class PositiveFloat(Float, Positive): | |
pass | |
class Regex(String): | |
def __init__(self, *args, pattern, **kwargs): | |
self.pattern = re.compile(pattern) | |
return super().__init__(*args, **kwargs) | |
def __set__(self, instance, value): | |
if not self.pattern.match(value): | |
raise ValueError("String doesn't match the expected pattern") | |
return super().__set__(instance, value) | |
class Sized(Descriptor): | |
def __init__(self, *args, maxlen, **kwargs): | |
self.maxlen = maxlen | |
return super().__init__(*args, **kwargs) | |
def __set__(self, instance, value): | |
if len(value) > self.maxlen: | |
raise ValueError(f"Maximum expected length is {self.maxlen}, got {len(value)}") | |
return super().__set__(instance, value) | |
class SizedRegex(Regex, Sized): | |
pass | |
def make_signature(fields): | |
""" | |
Generate `inspect.Signature` from given `fields` list. | |
This will allow us to pass in the arguments as kwargs | |
and will automatically do checking that the correct amount | |
of arguments were passed in. | |
""" | |
return Signature( | |
Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) | |
for name in fields | |
) | |
class StructMeta(type): | |
def __new__(cls, name, bases, clsdict): | |
clsobj = super().__new__(cls, name, bases, clsdict) | |
sig = make_signature(clsobj._fields) | |
setattr(clsobj, "__signature__", sig) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`_fields`, which will automatically be used to generate | |
a `__signature__` in the `StructMeta` defining meta class. | |
""" | |
_fields = [] | |
def __init__(self, *args, **kwargs): | |
bound = self.__signature__.bind(*args, **kwargs) | |
for name, val in bound.arguments.items(): | |
setattr(self, name, val) | |
class Stock(Structure): | |
_fields = ["name", "shares", "price"] | |
name = SizedRegex(maxlen=8, pattern=r"[A-Z]+$") | |
shares = PositiveInteger() | |
price = PositiveFloat() | |
""" | |
Here, we managed to impose restrictions directly, using the descriptor protocol. | |
This implementation is pretty decent and helped us avoid a lot of code and repetition, | |
but it's still not perfect. You might already see, that we define the variables | |
twice, in `_fields` and when we impose the restrictions with instantiating the descriptors. | |
Even this could therefore be automated and abstracted away, as shown in 6_metafields.py | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 5_descriptors.py | |
As described there, we still have a bit of repetetive code within our definition | |
of the child classes of the Structure class, we can automate this, by generating | |
the `_fields` using a metaclass directly by accessing the internally stored | |
variables for this class, which are present in any object and accessible by using | |
the `__dict__` attribute. We can therefore simply iterate through these variables | |
and check for those which are of `Descriptor` class, in which case we simply add | |
it into some `fields` list, which will then be used to construct our signature, | |
just like we previously used the `_fields` defined directly in that class. | |
""" | |
from collections import OrderedDict | |
from inspect import Parameter, Signature | |
import re | |
class Descriptor: | |
""" | |
This is a default descriptor implementation, it doesn't really do much | |
but it provides us with a basis for all other descriptors, which can | |
then override these methods (most notable setter, to impose some restrictions). | |
""" | |
def __set_name__(self, owner_cls, name): | |
""" | |
This method was implemented in python 3.6 (PEP 487), for any older versions | |
you'll have to use meta-classes, to handle this automatically, or | |
simply require the user to pass in the `name` in `__init__`. | |
This implementation is shown in `extra_manual_name.py` file | |
""" | |
self.name = name | |
def __get__(self, instance, owner_cls): | |
if instance is None: | |
return self | |
return instance.__dict__.get(self.name) | |
def __set__(self, instance, value): | |
instance.__dict__[self.name] = value | |
def __delete__(self, instance): | |
del instance.__dict__[self.name] | |
class Typed(Descriptor): | |
"""This is a general descriptor for enforcing types, it's expected to be subclassed.""" | |
ty = object # Expected type | |
def __set__(self, instance, value): | |
if not isinstance(value, self.ty): | |
raise TypeError(f"Expected {self.ty.__name__}, got {type(value).__name__}.") | |
return super().__set__(instance, value) | |
class Integer(Typed): | |
ty = int | |
class Float(Typed): | |
ty = float | |
def __set__(self, instance, value): | |
if isinstance(value, int): | |
value = float(value) | |
return super().__set__(instance, value) | |
class String(Typed): | |
ty = str | |
class Positive(Descriptor): | |
def __set__(self, instance, value): | |
if value < 0: | |
raise ValueError("Value must be positive (>= 0)") | |
return super().__set__(instance, value) | |
class PositiveInteger(Integer, Positive): | |
pass | |
class PositiveFloat(Float, Positive): | |
pass | |
class Regex(String): | |
def __init__(self, *args, pattern, **kwargs): | |
self.pattern = re.compile(pattern) | |
return super().__init__(*args, **kwargs) | |
def __set__(self, instance, value): | |
if not self.pattern.match(value): | |
raise ValueError("String doesn't match the expected pattern") | |
return super().__set__(instance, value) | |
class Sized(Descriptor): | |
def __init__(self, *args, maxlen, **kwargs): | |
self.maxlen = maxlen | |
return super().__init__(*args, **kwargs) | |
def __set__(self, instance, value): | |
if len(value) > self.maxlen: | |
raise ValueError(f"Maximum expected length is {self.maxlen}, got {len(value)}") | |
return super().__set__(instance, value) | |
class SizedRegex(Regex, Sized): | |
pass | |
def make_signature(fields): | |
""" | |
Generate `inspect.Signature` from given `fields` list. | |
This will allow us to pass in the arguments as kwargs | |
and will automatically do checking that the correct amount | |
of arguments were passed in. | |
""" | |
return Signature( | |
Parameter(name, Parameter.POSITIONAL_OR_KEYWORD) | |
for name in fields | |
) | |
class StructMeta(type): | |
@classmethod | |
def __prepare__(cls, name, bases): | |
""" | |
This is the class which returns the default empty dictionary | |
which will then be filled and treated as the `__dict__` for a | |
given class `cls`. | |
We're overriding this and returning an ordered dictionary instead. | |
This ensures that we keep the order of items for our arguments to | |
`__init__` of `cls`. With unordered dictionary, the order of those | |
arguments won't be preserved and *args might not have the intended | |
order. | |
""" | |
return OrderedDict() | |
def __new__(cls, name, bases, clsdict): | |
# This extracts the descriptor variable names | |
# we could also use `val.name` instead, because that's | |
# automatically obtained in descriptor with `__set_name__` | |
fields = [ | |
key for key, val in clsdict.items() | |
if isinstance(val, Descriptor) | |
] | |
# Make class object itself, with normal dictionary, not an ordered one | |
clsobj = super().__new__(cls, name, bases, dict(clsdict)) | |
# Produce the signature directly from obtained fields, rather than hard-coding | |
# it in the class definition as `_fields` | |
sig = make_signature(fields) | |
setattr(clsobj, "__signature__", sig) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`_fields`, which will automatically be used to generate | |
a `__signature__` in the `StructMeta` defining meta class. | |
""" | |
_fields = [] | |
def __init__(self, *args, **kwargs): | |
bound = self.__signature__.bind(*args, **kwargs) | |
for name, val in bound.arguments.items(): | |
setattr(self, name, val) | |
class Stock(Structure): | |
name = SizedRegex(maxlen=8, pattern=r"[A-Z]+$") | |
shares = PositiveInteger() | |
price = PositiveFloat() | |
""" | |
With this approach, everything is perfectly clear and there's no repetition at all, | |
it's a very clean and nice way to do this. But there is still a downside, and this one | |
is pretty obvious. It's performance. We did a lot of pre-processing which made our | |
code a lot slower, specifically, this is how the code performs: | |
Name | Code | Simple | Meta | Disadvantage | |
Instance creation | s=Stock('ACME',50,91.1) | 1.07s | 91.8s | 86x | |
Attribute lookup | s.price | 0.08s | 0.08s | 1x | |
Attribute assignment | s.price = 10.0 | 0.11s | 3.40s | 31x | |
Attribute assignment | s.name = 'ACME' | 0.14s | 8.14s | 58x | |
One way to get around this would be to utilize code generation, | |
as shown in 7_codegen.py | |
NOTICE: | |
The code here is very pythonic, clean and readable and it could be used in a production | |
environment as is, even with slowwer speeds. Usually I wouldn't say that it's a good | |
idea to stick with slower options if there are faster ones aviable, but if you will | |
check out the `7_codegen.py` file, you'll probably see why I'm saying this, the code there | |
is very chaotic, weird and non-pythonic, but it does make this faster and the user won't | |
really see a difference. | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Note that this is a continuation from 6_metafields.py | |
As described there, the approach we took was very clean and easy to recognize, but | |
also very slow, we could improve on this by using direct code generation as shown here. | |
Do know that doing this won't be pretty python code, it will get messy and weird, as | |
I said in the NOTICE section of the 6_metafields.py file, only keep reading this if | |
you really feel like you're ready for it and with the knowledge that it won't be pretty. | |
""" | |
from collections import OrderedDict | |
import re | |
def _make_init(fields): | |
"""Generate full python code of an __init__ function""" | |
code = f"def __init__(self, {', '.join(fields)}):\n" | |
for name in fields: | |
code += f" self.{name} = {name}\n" | |
return code | |
def _make_setter(dcls): | |
""" | |
Takes descriptor class and produces the code for `__set__` function, | |
to avoid super calls and slowdows from inheritance. | |
It walks the method resolution order and collects the code from `set_code` function, which it than concatenates to make a ne `__set__` method | |
""" | |
code = "def __set__(self, instance, value):\n" | |
for d in dcls.__mro__: | |
if 'set_code' in d.__dict__: | |
for line in d.set_code(): | |
code += f" {line}\n" | |
return code | |
class DescriptorMeta(type): | |
""" | |
We need to use a metaclass for Descriptors, in order to automatically | |
construct the `__set__` method for them using the code fragments | |
from `set_code` method. | |
""" | |
def __init__(self, clsname, bases, clsdict): | |
""" | |
We're using `__init__` instead of `__new__`, because the | |
generation of setter code using `_make_setter` requires | |
us to have the MRO already estabolished, which is done | |
when the class is being created in `__new__`, which has | |
already happened in `__new__` so we can use `__init__`. | |
""" | |
super().__init__(clsname, bases, clsdict) | |
# In case somebody tries to implement the classical __set__ | |
# method, disallow it, it would be overridden anyway and the | |
# use wouldn't know why, this way he'll be informed what's wrong | |
if '__set__' in clsdict: | |
raise TypeError("Use set_code(), not __set__()") | |
# Make the `__set__` code | |
code = _make_setter(self) | |
exec(code, globals(), clsdict) | |
# The actual __dict__ is altered and it becomes a mappingproxy | |
# rather than normal dictionary and it will be read-only | |
# the received `clsdict` is not necessarely the one that will be | |
# used for the class itself, so we need to define the `__set__` | |
# using setattr directly onto the class (`self`) | |
setattr(self, "__set__", clsdict["__set__"]) | |
class Descriptor(metaclass=DescriptorMeta): | |
""" | |
This is a default descriptor implementation, it doesn't really do much | |
but it provides us with a basis for all other descriptors, which can | |
then override these methods (most notable setter, to impose some restrictions). | |
""" | |
def __set_name__(self, owner_cls, name): | |
""" | |
This method was implemented in python 3.6 (PEP 487), for any older versions | |
you'll have to use meta-classes, to handle this automatically, or | |
simply require the user to pass in the `name` in `__init__`. | |
This implementation is shown in `extra_manual_name.py` file | |
""" | |
self.name = name | |
def __get__(self, instance, owner_cls): | |
if instance is None: | |
return self | |
return instance.__dict__.get(self.name) | |
@staticmethod | |
def set_code(): | |
return ["instance.__dict__[self.name] = value"] | |
def __delete__(self, instance): | |
del instance.__dict__[self.name] | |
class Typed(Descriptor): | |
"""This is a general descriptor for enforcing types, it's expected to be subclassed.""" | |
ty = object # Expected type | |
@staticmethod | |
def set_code(): | |
return [ | |
"if not isinstance(value, self.ty):", | |
" raise TypeError(f'Expected {self.ty.__name__}, got {type(value).__name__}.')", | |
] | |
class Integer(Typed): | |
ty = int | |
class Float(Typed): | |
ty = float | |
@staticmethod | |
def set_code(): | |
return [ | |
"if isinstance(value, int):", | |
" value = float(value)" | |
] | |
class String(Typed): | |
ty = str | |
class Positive(Descriptor): | |
@staticmethod | |
def set_code(): | |
return [ | |
"if value < 0:", | |
" raise ValueError('Value must be positive (>= 0)')" | |
] | |
class PositiveInteger(Integer, Positive): | |
pass | |
class PositiveFloat(Float, Positive): | |
pass | |
class Regex(String): | |
def __init__(self, *args, pattern, **kwargs): | |
self.pattern = re.compile(pattern) | |
return super().__init__(*args, **kwargs) | |
@staticmethod | |
def set_code(): | |
return [ | |
"if not self.pattern.match(value):", | |
" raise ValueError(\"String doesn't match the expected pattern\")" | |
] | |
class Sized(Descriptor): | |
def __init__(self, *args, maxlen, **kwargs): | |
self.maxlen = maxlen | |
return super().__init__(*args, **kwargs) | |
@staticmethod | |
def set_code(): | |
return [ | |
"if len(value) > self.maxlen:", | |
" raise ValueError(f'Maximum expected length is {self.maxlen}, got {len(value)}')" | |
] | |
class SizedRegex(Regex, Sized): | |
pass | |
class StructMeta(type): | |
@classmethod | |
def __prepare__(cls, name, bases): | |
""" | |
This is the class which returns the default empty dictionary | |
which will then be filled and treated as the `__dict__` for a | |
given class `cls`. | |
We're overriding this and returning an ordered dictionary instead. | |
This ensures that we keep the order of items for our arguments to | |
`__init__` of `cls`. With unordered dictionary, the order of those | |
arguments won't be preserved and *args might not have the intended | |
order. | |
""" | |
return OrderedDict() | |
def __new__(cls, name, bases, clsdict): | |
# This extracts the descriptor variable names | |
# we could also use `val.name` instead, because that's | |
# automatically obtained in descriptor with `__set_name__` | |
fields = [ | |
key for key, val in clsdict.items() | |
if isinstance(val, Descriptor) | |
] | |
# Directly execute the code from made __init__ fucntion | |
# This means that we don't need the signature anymore | |
if fields: | |
init_code = _make_init(fields) | |
exec(init_code, globals(), clsdict) | |
# Make class object itself, with normal dictionary, not an ordered one | |
clsobj = super().__new__(cls, name, bases, dict(clsdict)) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
Automatically generate `__init__` which stores | |
passed arguments accordingly to the names in | |
`_fields`, which will automatically be used to generate | |
a `__signature__` in the `StructMeta` defining meta class. | |
""" | |
_fields = [] | |
class Stock(Structure): | |
name = SizedRegex(maxlen=8, pattern=r"[A-Z]+$") | |
shares = PositiveInteger() | |
price = PositiveFloat() | |
""" | |
With this approach, we're generating the init code and the code for __set__ | |
and using that directly, with `exec`. (This is certainly weird and many would | |
say that it's not pythonic, which is true, but code generation is actually even | |
used in the stdlib collections module for named tuples, so if it's possible in | |
stdlib, we can do it too? maybe?) | |
This will make our instance creation significantly faster, but it comes | |
with a big drawback with static type checkers and with general readability. | |
Name | Code | Simple | Old Meta | New Meta + Exec | |
Instance creation | s=Stock('ACME',50,91.1) | 1.07s | 91.8s (86x) | 17.6s (6.7x) | |
Attribute lookup | s.price | 0.08s | 0.08s | 0.08s | |
Attribute assignment | s.price = 10.0 | 0.11s | 3.40s (31x) | 1.11s (10x) | |
Attribute assignment | s.name = 'ACME' | 0.14s | 8.14s (58x) | 2.95s (21x) | |
There won't be any benefit for attribute lookups because | |
we're not adjusting that in any way, in fact, we were using the normal | |
`__get__` even with the old approach which meant that it's execution time was | |
actually the same as with the simple implementation. | |
Note that the user won't be aware that we're doing any of this and the `exec` | |
will be hidden away, the general way of defining our class will stay the same | |
so the user won't notice anything. | |
Realistically though, you'd usually just stick with the first method, even though | |
it is slower, python itself wasn't made to be the fastest language out there, but | |
to be the most readable one, so readability should take precedence, this is here only | |
to show, what can theoretically be done, to improve our speeds if absolutely necessary. | |
DISCLAIMER: | |
If you tried to actually implement this inside of a production codebase of some | |
real library/application, I can't really help you with convincing the manager about | |
this because well, you'll either leave the building, or keep a permanent position there. | |
The main problem with this is that people who are less experienced won't even know | |
how to use your classes and what to do with something like this, and looking through | |
the original code likely won't help them at all without extensive searching about | |
descriptors, metaclasses and other things, and even then it would take them very long time | |
to actually understand a codebase like this. For that reason, this is mostly purely | |
for education and you probably won't have a chance of actually implementing something as | |
crazy as this in a real production codebase somewhere. | |
[ESOTERIC PART] | |
What's nice about this though, is that you can literally convert your classes into pure | |
XML, from which they can be reconstructed, example XML: | |
<structures> | |
<structure name="Stock"> | |
<field type="SizedRegex" maxlen="8" partition="'[A-Z]+$'">name</field> | |
<field type="PositiveInteger">shares</field> | |
<field type="PositiveFloat">price</field> | |
</structure> | |
<structure name="Address"> | |
<field type="String">hostname</field> | |
<field type="Integer">port</field> | |
</structure> | |
</structures> | |
Here you're getting full class serialization in something relatively readable. | |
Maybe that can convince your manager? (lol, don't try it). | |
But it would be possible to parse this XML directly, as shown below: | |
from xml.etree.ElementTree import parse | |
def _xml_to_code(filename): | |
doc = parse(filename) | |
code = "" | |
for structure in doc.findall("structure"): | |
clscode = _struct_to_class(structure) | |
code += clscode | |
return code | |
def _struct_to_class(structure): | |
name = structure.get("name") | |
code = f"class {name}(Structure):\n" | |
for field in structure.findall("field"): | |
descriptor_type = field.get("type") | |
options = [ | |
f"{key} = {val}" for key, val in field.items() | |
if key != "type" | |
] | |
name = field.text.strip() | |
code += f" {name} = {descriptor_type}({', '.join(options)})\n" | |
return code | |
code = _xml_to_code("classes.xml") | |
exec(code) | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
In python 3.6, a dunder `__set_name__` was introduced to | |
Descriptor Protocol (PEP 487), making it a lot easier to | |
avoid meta-classes, but if we're using older python versions, | |
or just for curiosity, to see how `__set_name__` would be implemented, | |
this is how the meta-class implementation would look. | |
""" | |
class Descriptor(object): | |
""" | |
Base descriptor implementation, with using the old | |
way of passing the `name` in manually, without the | |
automation of `__set_name__`. | |
""" | |
def __init__(self, name=None): | |
""" | |
Notice that the `name` is optional, in order to | |
provide a way of initialization without it, so that | |
the metaclass implemented below can handle setting the | |
`name` for us automatically. | |
""" | |
self.name = name | |
def __get__(self, instance, owner): | |
return instance.__dict__.get(self.name) | |
def __set__(self, instance, value): | |
instance.__dict__[self.name] = value | |
def __delete__(self, instance): | |
del instance.__dict__[self.name] | |
class Integer(Descriptor): | |
def __set__(self, instance, value): | |
if not isinstance(value, int): | |
raise TypeError("value must be integer") | |
return super().__set__(instance, value) | |
class StructMeta(type): | |
def __new__(cls, name, bases, clsdict): | |
fields = [ | |
key for key, val in clsdict.items() | |
if isinstance(val, Descriptor) | |
] | |
for name in fields: | |
# Give the `name` attribute obtained from the class dict | |
# to the decorator instnace stored under that name | |
clsdict[name].name = name | |
clsobj = super().__new__(cls, name, bases, clsdict) | |
return clsobj | |
class Structure(metaclass=StructMeta): | |
""" | |
This is just a placeholder class, so that user can inherit from | |
this class, rather than having to do metaclass=StructMeta, since | |
the metaclass will be inhereted with this class automatically. | |
""" | |
pass | |
class Stock(Structure): | |
shares = Integer() | |
price = Integer() | |
def __init__(self, name, shares, price): | |
self.name = name | |
self.shares = shares | |
self.price = price | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment