-
-
Save xkortex/c59058f9245104d54b4075ef96692554 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
""" | |
I've been want for something that lets me reuse flags across multiple subcommand, between projects, etc. | |
The native argparse makes this a little tricky, especially if you want custom pydantic data classes. | |
I have also wanted a click-like ease of experience but without all the decorators and deep stack traces clouding my logs. | |
I spent a bit of time hacking on the idea and came up with some interesting patterns. There's still a lot to iron out, | |
a lot of the ergonomics could be improved with more introspection, but I thought I would at least throw this over the wall | |
and see if there's any interest in it. | |
The overall concept is leveraging metaprogramming to create type annotations which double as both type hints and the | |
option parsing metadata. This is by no means the best way of doing it but I think there's promise here in the general idea. | |
Scroll down to Application Logic to get a gist of what I'm going for first and then flow back to the top. | |
""" | |
import argparse | |
from dataclasses import dataclass | |
from typing import Optional, Union, Any, List | |
from loguru import logger | |
import pydantic | |
@dataclass | |
class ArgAction: | |
dest: str | |
type: type | |
nargs = None | |
const = False | |
required = False | |
val: Any = None | |
help: str = None | |
long: str = None | |
short: str = None | |
metavar: str = None | |
default: str = None | |
choices: List[Any] = None | |
class GlobalParser(object): | |
def __init__(self): | |
self.parser = argparse.ArgumentParser() | |
self.actions = [] | |
self.subparsers = self.parser.add_subparsers(help='sub-command help') | |
def add(self, act: ArgAction): | |
self.actions.append(act) | |
def compile(self, obj): | |
sp = self.subparsers.add_parser(obj.__name__) | |
for act in self.actions: | |
if act.dest not in obj.__fields__: | |
continue | |
vv = dict(vars(act)) | |
vv.pop('val') | |
option_strings = [x for x in [vv.pop('short'), vv.pop('long')] if x is not None] | |
sp.add_argument(*option_strings, **vv) | |
gparser = GlobalParser() # there's better ways to do this, I'm sure | |
class OptionMeta(type): | |
__option__: ArgAction | |
def __getitem__(self, co: ArgAction): | |
gparser.add(co) | |
self.__option__ = co | |
logger.info(co) | |
return type('ArgumentOption', (ArgumentOption,), {'__option__': co}) | |
# todo: specific Argument Types for various types | |
class ArgumentOption(str, metaclass=OptionMeta): | |
@classmethod | |
def __get_validators__(cls): | |
yield cls.validate_type | |
@classmethod | |
def validate_type(cls, val): | |
option = getattr(cls, '__option__', Any) | |
logger.info('Validate: {}: {} |\n {}: {}'.format(type(option), option, type(val), val)) | |
return ArgumentOption(val) | |
@classmethod | |
def __modify_schema__(cls, field_schema): | |
# __modify_schema__ should mutate the dict it receives in place, | |
# the returned value will be ignored | |
field_schema.update( | |
type='ArgumentOption' | |
) | |
class IntOption(int, metaclass=OptionMeta): | |
@classmethod | |
def __get_validators__(cls): | |
yield cls.validate_type | |
@classmethod | |
def validate_type(cls, val): | |
option = getattr(cls, '__option__', Any) | |
logger.info('Validate: {}: {} |\n {}: {}'.format(type(option), option, type(val), val)) | |
return IntOption(val) | |
@classmethod | |
def __modify_schema__(cls, field_schema): | |
field_schema.update( | |
type='IntOption' | |
) | |
class OptoParser(pydantic.BaseModel): | |
@classmethod | |
def compile(cls): | |
for plug in cls.__subclasses__(): | |
try: | |
gparser.compile(plug) | |
except: | |
plug.compile() | |
@classmethod | |
def compile_cls(cls): | |
gparser.compile(cls) | |
@classmethod | |
def parse_args(cls): | |
logger.info('{}: {}'.format(cls.__name__, cls.__fields__)) | |
# todo: use fields to infer default dest | |
args = gparser.parser.parse_args() | |
return cls(**vars(args)) | |
### ======== everything above this would be in the library ======== | |
### ========. below is application logic ======== | |
"""This is my "dream API" for a cli interface. A bunch of general-pupose data fields which can be independently | |
mixed and matched, and automatically used to generate the CLI. | |
""" | |
# Options you might want for a cli that can read a file | |
class InputMixin(pydantic.BaseModel): | |
input_uri: Optional[ArgumentOption[ArgAction('input_uri', str, short='-i', long='--input_uri')]] = '' | |
bar: Optional[ArgumentOption[ArgAction('bar', str, short='-b', long='--bar')]] = '' | |
depth: Optional[IntOption[ArgAction('depth', str, short='-R', long='--depth')]] = 2 | |
# Options you might want for a cli that can write a file | |
class OutputMixin(pydantic.BaseModel): | |
output_uri: Optional[ArgumentOption[ArgAction('output_uri', str, short='-o', long='--output_uri')]] = '' | |
force: Optional[ArgumentOption[ArgAction('force', str, short='-format', long='--force')]] = 'False' | |
class getin(InputMixin, OptoParser): | |
# this is the first real subcommand | |
... | |
class getout(OutputMixin, OptoParser): | |
# this is the second real subcommand | |
... | |
# this comment can be extracted using inspect to populate help | |
class subcmd1(InputMixin, OutputMixin, OptoParser): | |
""" | |
This subcommand has features from both Input and Output mixins. | |
""" | |
## Still trying to figure out the optimal way to define the default value behavior. Part of the issue is | |
## pydantic's machinery wants to determine if a value is optional or not by looking for the Optional type | |
## I could probably just pull the default value from __fields__ and stuff that into the argparse default. | |
opto: Optional[ArgumentOption[ArgAction('opto', str, short='-O', long='--opto')]] = 'unset' | |
nana: Optional[ArgumentOption[ArgAction('nana', str, short='-n', long='--nana')]] = None | |
if __name__ == "__main__": | |
logger.info('________start________') | |
OptoParser.compile() | |
# still have to figure out best way to dispatch to appropriate subcommand. | |
cmd1 = getin.parse_args() | |
cmd2 = getout.parse_args() | |
cmd3 = subcmd1.parse_args() | |
print('\n') | |
print(cmd3) | |
I would definitely encourage exploring different design approaches to help build better and more robust CLI tools.
A few comments:
-
I believe sharing sub parser options in
pydantic-cli
is possible. https://gist.github.com/mpkocher/11ebeacdda7a146c326419a410edb61e -
I am hopeful that the
Annotated
feature introduced in Python 3.9 via PEP-593 has a lot of potential to address these types of problems. However, it's not clear to me how/when Pydantic will investigate how to do this. I've been exploring a few ideas in this space, but it's still in an experimental state. -
If you're trying to share args in
argparse
across subparsers, this can be done using function composition. I cover this in my Functional Programming Techniques Part 3 and this approach is used heavily in bioinformatic tools used in production. This worked fine in the Python 2.7 era, however, I would probably do things differently now. -
If you're going for a "leverage the dynamic nature of Python" approach, your design seems reasonable. In general, I'm moving away from this sort of pattern to a more typesafe design (if possible) and avoiding getattr, settattr, etc... , specifically for core libraries.
-
ArgAction
is largely reimplementingFieldInfo
from Pydantic. This was one of the reason's whypydantic-cli
choose Pydantic to avoid reinventing the metadata wheel. Using FieldInfo as the core metadata container is also a strong motivating factor for hiding the (internal) argparse details (there's pros and cons to this approach).
Also, a heads up, except:
is not the same as except Exception:
. http://www.wilfred.me.uk/blog/2013/11/03/no-naked-excepts/
Thanks for the quick response and the tips!
And yes I am aware bare except is bad form, this is nowhere near production quality, I just wanted to crank out a PoC between some container builds.
I too favor functional/typed style over the more dynamic stuff and since the purpose of config parsers is to basically set up the declarative initial state, I'm sure one could readily make the entire argument parsing process super functional.
I did not know about FieldInfo
- that's exactly the sort of data structure I was looking for.
I discovered while tooling around after posting this that I could probably handle the arg-parsier-parsing as a set of functions which operate on the Pydantic structures rather than injecting metadata into the variable annotations themselves, so I'll be revisiting this problem using that approach with the FieldInfo interface.
How feasible do you think it would be to drop argparse entirely and write a sys.argv parser from scratch? Particularly, if we accept the limitation of no positional arguments, or only positional arguments after --
(I know you've stated that posargs clash with the cleanliness of mapping a JSON directly to a CLI and I have observed indeed posargs complicate interfaces quite a lot). I feel like the ideal solution would be to compose a function which takes the full argv string, plus env and config files, and emits an immutable data structure. At this point it honestly feels super crufty, but this feels like one of those "How hard could it be?" extremely tricky and nuanced problem spaces.
@xkortex have you worked on this since the gist was posted?
python optoparser.py subcmd1 -i foo --bar bar -o baz