chisophugis · August 21, 2014 19:55
diff --git a/options.py b/options.py
 #!/usr/bin/env python
 #
 # A prototype for consistently handling command line options by
 # interpreting them as a JSON object, following a schema.
 # The result of command line option parsing is a "JSON object" which is
 # can be passed around and manipulated as desired.
 #
 # Example:
 # $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6
 # {
 #   "inliner": {
 #     "threshold": 6,
 #     "enable": true
 #   },
 #   "vectorizer": {
 #     "loop": {
 #       "enable": true
 #     }
 #   }
 # }
 #
 # See the comment above the OPTION_SCHEMA variable for what the schema looks
 # like (the functionality available now is quite limited, but hopefully
 # conveys the gist).
 #
 # Skip down to main() to see the primary flow of the code.
 #
 # For now, '-' is used as the separator for properties This is just because
 # '-' is common in command line option names.  It actually might make more
 # sense to use '.' which might make things more intuitive and make it clear
 # that this is a more "formalized" option scheme:
 # $ foo --foo.bar=true --bar.quux.quuux=8
 # You can tweak the global SEP variable to change this to be '.' or whatever.
 #
 # Note that by a coincidence with braced shell expansion, this works:
 # $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}}
 # Or with SEP == '.':
 # $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}}
 # $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}}
 # $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3}
 #
 # (Note that I just pulled the option names for this schema out of thin
 # air plus vague recollection of options I've seen in places; they probably
 # don't make sense)

 from __future__ import print_function

 import sys
 import json

 SEP = '-'

 # A leaf of this JSON object must be a string, which indicates the type.
 # For now, there should be no arrays.
 # The only types that can be specified currently are "bool" and "int".
 # In the future, adding a list<T> might be useful, so you can do e.g.
 # --internalize.assume_external=foo_func,bar_func,baz_func
 # to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>.
 #
 # See at the end of this for a strawman for a nice static table-based
 # recursive way to define these options in C/C++.
 OPTION_SCHEMA = json.loads('''
 {
  "vectorizer": {
    "aggressive": "bool",
    "fast": "bool",
    "loop": {
      "enable": "bool",
      "max_depth": "int"
    }
  },
  "inliner": {
    "enable": "bool",
    "threshold": "int"
  },
  "verbose": "bool"
 }
 ''')

 # E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"}
 # This transformation simplifies the code by requiring less nesting.
 # The function unflatten_dict does the opposite transformation.
 def flatten_dict(d, separator=SEP):
    ret = {}
    def _rec(x, curprefix):
        if isinstance(x, dict):
            for k, v in x.items():
                assert separator not in k
                _rec(v, curprefix + [k])
        else:
            ret[separator.join(curprefix)] = x
    _rec(d, [])
    return ret

 FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA)



 def main():
    options = {}
    for opt in sys.argv[1:]:
        # We expect opt to look like --foo-bar=baz
        if not opt.startswith('--'):
            print('option must start with "--"')
            continue
        before, _, after = opt[len('--'):].partition('=')
        if after == '':
            print('option not formatted correctly {!r}'.format(opt))
            continue
        ty = FLATTENED_OPTION_SCHEMA.get(before, None)
        if ty is None or not isinstance(ty, (str, unicode)):
            print('{!r} is not a valid key index'.format(before))
            continue
        parse = globals()['parse_{}_option'.format(ty)]
        try:
            value = parse(after)
        except:
            print('could not parse {!r} as {}'.format(after, ty))
            continue
        options[before] = value
    # print('The resulting option JSON is:')
    print(json.dumps(unflatten_dict(options), indent=2))


 def unflatten_dict(d, separator=SEP):
    ret = {}
    for k, v in d.items():
        multi_index_set(ret, k.split(separator), v)
    return ret
 def multi_index_set(d, indices, v): # set as in get/set, not set theory
    assert len(indices) > 0
    for k in indices[:-1]:
        if k not in d:
            d[k] = {}
        d = d[k]
    d[indices[-1]] = v



 # Option parsing

 def parse_bool_option(s):
    s = s.lower()
    if s in ('true', 'on', '1'):
        return True
    if s in ('false', 'off', '0'):
        return False
    raise Exception('Could not parse {!r} as bool'.format(s))

 def parse_int_option(s):
    return int(s)


 main()


 # Addendum:
 #
 # Strawman for distributing the schema across the source code of a C/C++
 # program, to keep the definitions of options appropriately "local". Some
 # of the options in the schema above don't really make sense from this
 # point of view... but whatever.
 #
 # OptionSchema.h:
 # >>>
 # enum OptionKind {
 #   Bool,
 #   Int,
 #   Subobject
 # }
 #
 # struct SchemaEntry {
 #   const char *Key;
 #   OptionKind ValueKind;
 #   SchemaEntry *OptionalSubobject;
 # }
 # <<<
 #
 #
 # toplevel.cpp:
 # >>>
 # extern SchemaEntry VectorizerSchemaEntry[];
 # extern SchemaEntry InlinerSchemaEntry[];
 #
 # SchemaEntry TopLevelSchemaEntry[] = {
 #   {"vectorizer", Subobject, &VectorizerSchemaEntry},
 #   {"inliner", Subobject, &InlinerSchemaEntry},
 #   {"verbose", Bool},
 #   {0}
 # };
 # <<<
 #
 #
 # inliner.cpp:
 # >>>
 # SchemaEntry InlinerSchemaEntry[] = {
 #   {"enable", Bool},
 #   {"threshold", Int},
 #   {0}
 # };
 # <<<
	#!/usr/bin/env python
	#
	# A prototype for consistently handling command line options by
	# interpreting them as a JSON object, following a schema.
	# The result of command line option parsing is a "JSON object" which is
	# can be passed around and manipulated as desired.
	#
	# Example:
	# $ ./options.py --vectorizer-loop-enable=true --inliner-enable=true --inliner-threshold=6
	# {
	# "inliner": {
	# "threshold": 6,
	# "enable": true
	# },
	# "vectorizer": {
	# "loop": {
	# "enable": true
	# }
	# }
	# }
	#
	# See the comment above the OPTION_SCHEMA variable for what the schema looks
	# like (the functionality available now is quite limited, but hopefully
	# conveys the gist).
	#
	# Skip down to main() to see the primary flow of the code.
	#
	# For now, '-' is used as the separator for properties This is just because
	# '-' is common in command line option names. It actually might make more
	# sense to use '.' which might make things more intuitive and make it clear
	# that this is a more "formalized" option scheme:
	# $ foo --foo.bar=true --bar.quux.quuux=8
	# You can tweak the global SEP variable to change this to be '.' or whatever.
	#
	# Note that by a coincidence with braced shell expansion, this works:
	# $ ./options.py --vectorizer{-aggressive=on,-loop{-enable=true,-max_depth=3}}
	# Or with SEP == '.':
	# $ ./options.py --vectorizer{.aggressive=on,.loop{.enable=true,.max_depth=3}}
	# $ ./options.py --vectorizer.{aggressive=on,loop.{enable=true,max_depth=3}}
	# $ ./options.py --vectorizer.{aggressive=on,loop.enable=true,loop.max_depth=3}
	#
	# (Note that I just pulled the option names for this schema out of thin
	# air plus vague recollection of options I've seen in places; they probably
	# don't make sense)

	from __future__ import print_function

	import sys
	import json

	SEP = '-'

	# A leaf of this JSON object must be a string, which indicates the type.
	# For now, there should be no arrays.
	# The only types that can be specified currently are "bool" and "int".
	# In the future, adding a list<T> might be useful, so you can do e.g.
	# --internalize.assume_external=foo_func,bar_func,baz_func
	# to get the value ["foo_func", "bar_func", "baz_func"] for a list<string>.
	#
	# See at the end of this for a strawman for a nice static table-based
	# recursive way to define these options in C/C++.
	OPTION_SCHEMA = json.loads('''
	{
	"vectorizer": {
	"aggressive": "bool",
	"fast": "bool",
	"loop": {
	"enable": "bool",
	"max_depth": "int"
	}
	},
	"inliner": {
	"enable": "bool",
	"threshold": "int"
	},
	"verbose": "bool"
	}
	''')

	# E.g. turn {"a": {"b": "foo"}} into {"a-b":"foo"}
	# This transformation simplifies the code by requiring less nesting.
	# The function unflatten_dict does the opposite transformation.
	def flatten_dict(d, separator=SEP):
	ret = {}
	def _rec(x, curprefix):
	if isinstance(x, dict):
	for k, v in x.items():
	assert separator not in k
	_rec(v, curprefix + [k])
	else:
	ret[separator.join(curprefix)] = x
	_rec(d, [])
	return ret

	FLATTENED_OPTION_SCHEMA = flatten_dict(OPTION_SCHEMA)



	def main():
	options = {}
	for opt in sys.argv[1:]:
	# We expect opt to look like --foo-bar=baz
	if not opt.startswith('--'):
	print('option must start with "--"')
	continue
	before, _, after = opt[len('--'):].partition('=')
	if after == '':
	print('option not formatted correctly {!r}'.format(opt))
	continue
	ty = FLATTENED_OPTION_SCHEMA.get(before, None)
	if ty is None or not isinstance(ty, (str, unicode)):
	print('{!r} is not a valid key index'.format(before))
	continue
	parse = globals()['parse_{}_option'.format(ty)]
	try:
	value = parse(after)
	except:
	print('could not parse {!r} as {}'.format(after, ty))
	continue
	options[before] = value
	# print('The resulting option JSON is:')
	print(json.dumps(unflatten_dict(options), indent=2))


	def unflatten_dict(d, separator=SEP):
	ret = {}
	for k, v in d.items():
	multi_index_set(ret, k.split(separator), v)
	return ret
	def multi_index_set(d, indices, v): # set as in get/set, not set theory
	assert len(indices) > 0
	for k in indices[:-1]:
	if k not in d:
	d[k] = {}
	d = d[k]
	d[indices[-1]] = v



	# Option parsing

	def parse_bool_option(s):
	s = s.lower()
	if s in ('true', 'on', '1'):
	return True
	if s in ('false', 'off', '0'):
	return False
	raise Exception('Could not parse {!r} as bool'.format(s))

	def parse_int_option(s):
	return int(s)


	main()


	# Addendum:
	#
	# Strawman for distributing the schema across the source code of a C/C++
	# program, to keep the definitions of options appropriately "local". Some
	# of the options in the schema above don't really make sense from this
	# point of view... but whatever.
	#
	# OptionSchema.h:
	# >>>
	# enum OptionKind {
	# Bool,
	# Int,
	# Subobject
	# }
	#
	# struct SchemaEntry {
	# const char *Key;
	# OptionKind ValueKind;
	# SchemaEntry *OptionalSubobject;
	# }
	# <<<
	#
	#
	# toplevel.cpp:
	# >>>
	# extern SchemaEntry VectorizerSchemaEntry[];
	# extern SchemaEntry InlinerSchemaEntry[];
	#
	# SchemaEntry TopLevelSchemaEntry[] = {
	# {"vectorizer", Subobject, &VectorizerSchemaEntry},
	# {"inliner", Subobject, &InlinerSchemaEntry},
	# {"verbose", Bool},
	# {0}
	# };
	# <<<
	#
	#
	# inliner.cpp:
	# >>>
	# SchemaEntry InlinerSchemaEntry[] = {
	# {"enable", Bool},
	# {"threshold", Int},
	# {0}
	# };
	# <<<