Created
August 21, 2014 19:55
-
-
Save chisophugis/942ae335103ac3391983 to your computer and use it in GitHub Desktop.
Command line options as JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# A prototype for consistently handling command line options by | |
# interpreting them as a JSON object, following a schema. | |
# The result of command line option parsing is a "JSON object" which is | |
# can be passed around and manipulated as desired. | |
# | |
# Example: | |
# $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6 | |
# { | |
# "inliner": { | |
# "threshold": 6, | |
# "enable": true | |
# }, | |
# "vectorizer": { | |
# "loop": { | |
# "enable": true | |
# } | |
# } | |
# } | |
# | |
# See the comment above the OPTION_SCHEMA variable for what the schema looks | |
# like (the functionality available now is quite limited, but hopefully | |
# conveys the gist). | |
# | |
# Skip down to main() to see the primary flow of the code. | |
# | |
# For now, '-' is used as the separator for properties This is just because | |
# '-' is common in command line option names. It actually might make more | |
# sense to use '.' which might make things more intuitive and make it clear | |
# that this is a more "formalized" option scheme: | |
# $ foo --foo.bar=true --bar.quux.quuux=8 | |
# You can tweak the global SEP variable to change this to be '.' or whatever. | |
# | |
# Note that by a coincidence with braced shell expansion, this works: | |
# $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}} | |
# Or with SEP == '.': | |
# $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}} | |
# $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}} | |
# $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3} | |
# | |
# (Note that I just pulled the option names for this schema out of thin | |
# air plus vague recollection of options I've seen in places; they probably | |
# don't make sense) | |
from __future__ import print_function | |
import sys | |
import json | |
SEP = '-' | |
# A leaf of this JSON object must be a string, which indicates the type. | |
# For now, there should be no arrays. | |
# The only types that can be specified currently are "bool" and "int". | |
# In the future, adding a list<T> might be useful, so you can do e.g. | |
# --internalize.assume_external=foo_func,bar_func,baz_func | |
# to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>. | |
# | |
# See at the end of this for a strawman for a nice static table-based | |
# recursive way to define these options in C/C++. | |
OPTION_SCHEMA = json.loads(''' | |
{ | |
"vectorizer": { | |
"aggressive": "bool", | |
"fast": "bool", | |
"loop": { | |
"enable": "bool", | |
"max_depth": "int" | |
} | |
}, | |
"inliner": { | |
"enable": "bool", | |
"threshold": "int" | |
}, | |
"verbose": "bool" | |
} | |
''') | |
# E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"} | |
# This transformation simplifies the code by requiring less nesting. | |
# The function unflatten_dict does the opposite transformation. | |
def flatten_dict(d, separator=SEP): | |
ret = {} | |
def _rec(x, curprefix): | |
if isinstance(x, dict): | |
for k, v in x.items(): | |
assert separator not in k | |
_rec(v, curprefix + [k]) | |
else: | |
ret[separator.join(curprefix)] = x | |
_rec(d, []) | |
return ret | |
FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA) | |
def main(): | |
options = {} | |
for opt in sys.argv[1:]: | |
# We expect opt to look like --foo-bar=baz | |
if not opt.startswith('--'): | |
print('option must start with "--"') | |
continue | |
before, _, after = opt[len('--'):].partition('=') | |
if after == '': | |
print('option not formatted correctly {!r}'.format(opt)) | |
continue | |
ty = FLATTENED_OPTION_SCHEMA.get(before, None) | |
if ty is None or not isinstance(ty, (str, unicode)): | |
print('{!r} is not a valid key index'.format(before)) | |
continue | |
parse = globals()['parse_{}_option'.format(ty)] | |
try: | |
value = parse(after) | |
except: | |
print('could not parse {!r} as {}'.format(after, ty)) | |
continue | |
options[before] = value | |
# print('The resulting option JSON is:') | |
print(json.dumps(unflatten_dict(options), indent=2)) | |
def unflatten_dict(d, separator=SEP): | |
ret = {} | |
for k, v in d.items(): | |
multi_index_set(ret, k.split(separator), v) | |
return ret | |
def multi_index_set(d, indices, v): # set as in get/set, not set theory | |
assert len(indices) > 0 | |
for k in indices[:-1]: | |
if k not in d: | |
d[k] = {} | |
d = d[k] | |
d[indices[-1]] = v | |
# Option parsing | |
def parse_bool_option(s): | |
s = s.lower() | |
if s in ('true', 'on', '1'): | |
return True | |
if s in ('false', 'off', '0'): | |
return False | |
raise Exception('Could not parse {!r} as bool'.format(s)) | |
def parse_int_option(s): | |
return int(s) | |
main() | |
# Addendum: | |
# | |
# Strawman for distributing the schema across the source code of a C/C++ | |
# program, to keep the definitions of options appropriately "local". Some | |
# of the options in the schema above don't really make sense from this | |
# point of view... but whatever. | |
# | |
# OptionSchema.h: | |
# >>> | |
# enum OptionKind { | |
# Bool, | |
# Int, | |
# Subobject | |
# } | |
# | |
# struct SchemaEntry { | |
# const char *Key; | |
# OptionKind ValueKind; | |
# SchemaEntry *OptionalSubobject; | |
# } | |
# <<< | |
# | |
# | |
# toplevel.cpp: | |
# >>> | |
# extern SchemaEntry VectorizerSchemaEntry[]; | |
# extern SchemaEntry InlinerSchemaEntry[]; | |
# | |
# SchemaEntry TopLevelSchemaEntry[] = { | |
# {"vectorizer", Subobject, &VectorizerSchemaEntry}, | |
# {"inliner", Subobject, &InlinerSchemaEntry}, | |
# {"verbose", Bool}, | |
# {0} | |
# }; | |
# <<< | |
# | |
# | |
# inliner.cpp: | |
# >>> | |
# SchemaEntry InlinerSchemaEntry[] = { | |
# {"enable", Bool}, | |
# {"threshold", Int}, | |
# {0} | |
# }; | |
# <<< |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment