tuckerzp · July 9, 2021 14:34
diff --git a/format.py b/format.py
 #!/usr/bin/env python3
 import json
 from typing import Any, Dict, List, Union, TextIO

 OSCAL_CATALOG_FILES = [
    "oscal_catalog_schema",
    "oscal_component_schema",
    "oscal_profile_schema",
    "oscal_ssp_schema"
 ]

 # Basic type alias. Honestly not sure that we'll ever see
 # null but it's good to do our best to handle it when we can.
 JsonTypes = Union[str, bool, type(None), int]

 # Using a consistent JSON encoder rather than re-creating one as
 # json.JSONEncoder().encode() every time that we need to encode is
 # slightly more efficient. We might be able to just use `json.dumps`
 # but this stemmed from originally trying to do all this with
 # json.JSONEncoder().iterencode()
 ENCODER = json.JSONEncoder()

 def dump_list(data: List[JsonTypes], level: int, out_file: TextIO):
    """
    Dump a list in the same format as the one used by the OSCAL schema files.
    """
    # Since lists are pretty short and simple, we can go ahead and just format
    # all the items as JSON strings right away. This is because in the JSON
    # lists we're dealing with, lists always contain simple data types.
    # It is important to note that if they did not, this would not work and
    # we've have to have some logic to handle the various types like we do
    # in dump_dict.
    items = [ENCODER.encode(item) for item in data]
    for idx, value in enumerate(items):
        if idx == 0:
            print(file=out_file)
            print(" " * level * 2, end="", file=out_file)
            print("[ ", end="", file=out_file)
            level += 1
        else:
            print(" " * level * 2, end="", file=out_file)
        print(value, end="", file=out_file)
        if idx == len(items) - 1:
            print(" ]", end="", file=out_file)
        else:
            print(",", file=out_file)


 def dump_dict(data: Dict[str, Any], level: int, out_file: TextIO):
    """
    Dump a dictionary in the same format used by the OSCAL Schema files.

    :param data: The data to dump
    :param level: The level of indentation
    :param out_file: The file to write to
    """
    keys = list(data.keys())

    for idx, key in enumerate(keys):
        value = data[key]
        # Handle the first item in the dictionary specially; this is required
        # because the new indentation technically doesn't take place until the
        # second key for the line itself (though interstingly, because '{ ' is
        # two characters, they keys still stay aligned
        # Results in a line that starts like:
        #   { KEY :
        # for the first line and otherwise just starts with the proper indentation
        # level for all other lines, then with KEY :
        if idx == 0:
            # This particular print is important as the NIST OSCAL schema files
            # always start with a blank line so our logic needs to account for that
            print(file=out_file)
            print(" " * level * 2, end="", file=out_file)
            print("{ ", end="", file=out_file)
            level += 1
        else:
            print(" " * level * 2, end="", file=out_file)

        # The key is always going to be a string, which is one of the lovely things
        # about JSON. This means that we can just dump it as a JSON-encoded string
        # and continue.
        print(ENCODER.encode(key), end=" ", file=out_file)
        print(":", end=" ", file=out_file)

        # We've got to handle only a few supported types correctly.
        # The "basic" types can just be dumped directly and we'll
        # handle the formatting for , vs } at the end here
        if isinstance(value, (str, int, bool, type(None))):
            print(ENCODER.encode(value), end="", file=out_file)
        # Dictionaries result in a recursive call to this function
        # which the logic at the start of the function handles cleanly.
        # Theoretically if we had too many layers of dictionaries we
        # could run into issues with stack depth but that is pretty
        # unlikely.
        if isinstance(value, dict):
            dump_dict(value, level, out_file)
        # Lists are the only complex/special type in addition to
        # dictionaries that we support
        if isinstance(value, list):
            dump_list(value, level, out_file)

        # The end of the dictioanary never results in a new line
        # and this is rather important because the NIST OSCAL schema files
        # also never end with a newline character
        if idx == len(keys) - 1:
            print(" }", end="", file=out_file)
        else:
            print(",", file=out_file)


 def main():
    for filename in OSCAL_CATALOG_FILES:
        with open(f"{filename}.json") as source_file:
            data = json.load(source_file)
  
        with open(f"{filename}.json", "w") as output_file:
            dump_dict(data, 1, output_file)

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	import json
	from typing import Any, Dict, List, Union, TextIO

	OSCAL_CATALOG_FILES = [
	"oscal_catalog_schema",
	"oscal_component_schema",
	"oscal_profile_schema",
	"oscal_ssp_schema"
	]

	# Basic type alias. Honestly not sure that we'll ever see
	# null but it's good to do our best to handle it when we can.
	JsonTypes = Union[str, bool, type(None), int]

	# Using a consistent JSON encoder rather than re-creating one as
	# json.JSONEncoder().encode() every time that we need to encode is
	# slightly more efficient. We might be able to just use `json.dumps`
	# but this stemmed from originally trying to do all this with
	# json.JSONEncoder().iterencode()
	ENCODER = json.JSONEncoder()

	def dump_list(data: List[JsonTypes], level: int, out_file: TextIO):
	"""
	Dump a list in the same format as the one used by the OSCAL schema files.
	"""
	# Since lists are pretty short and simple, we can go ahead and just format
	# all the items as JSON strings right away. This is because in the JSON
	# lists we're dealing with, lists always contain simple data types.
	# It is important to note that if they did not, this would not work and
	# we've have to have some logic to handle the various types like we do
	# in dump_dict.
	items = [ENCODER.encode(item) for item in data]
	for idx, value in enumerate(items):
	if idx == 0:
	print(file=out_file)
	print(" " * level * 2, end="", file=out_file)
	print("[ ", end="", file=out_file)
	level += 1
	else:
	print(" " * level * 2, end="", file=out_file)
	print(value, end="", file=out_file)
	if idx == len(items) - 1:
	print(" ]", end="", file=out_file)
	else:
	print(",", file=out_file)


	def dump_dict(data: Dict[str, Any], level: int, out_file: TextIO):
	"""
	Dump a dictionary in the same format used by the OSCAL Schema files.

	:param data: The data to dump
	:param level: The level of indentation
	:param out_file: The file to write to
	"""
	keys = list(data.keys())

	for idx, key in enumerate(keys):
	value = data[key]
	# Handle the first item in the dictionary specially; this is required
	# because the new indentation technically doesn't take place until the
	# second key for the line itself (though interstingly, because '{ ' is
	# two characters, they keys still stay aligned
	# Results in a line that starts like:
	# { KEY :
	# for the first line and otherwise just starts with the proper indentation
	# level for all other lines, then with KEY :
	if idx == 0:
	# This particular print is important as the NIST OSCAL schema files
	# always start with a blank line so our logic needs to account for that
	print(file=out_file)
	print(" " * level * 2, end="", file=out_file)
	print("{ ", end="", file=out_file)
	level += 1
	else:
	print(" " * level * 2, end="", file=out_file)

	# The key is always going to be a string, which is one of the lovely things
	# about JSON. This means that we can just dump it as a JSON-encoded string
	# and continue.
	print(ENCODER.encode(key), end=" ", file=out_file)
	print(":", end=" ", file=out_file)

	# We've got to handle only a few supported types correctly.
	# The "basic" types can just be dumped directly and we'll
	# handle the formatting for , vs } at the end here
	if isinstance(value, (str, int, bool, type(None))):
	print(ENCODER.encode(value), end="", file=out_file)
	# Dictionaries result in a recursive call to this function
	# which the logic at the start of the function handles cleanly.
	# Theoretically if we had too many layers of dictionaries we
	# could run into issues with stack depth but that is pretty
	# unlikely.
	if isinstance(value, dict):
	dump_dict(value, level, out_file)
	# Lists are the only complex/special type in addition to
	# dictionaries that we support
	if isinstance(value, list):
	dump_list(value, level, out_file)

	# The end of the dictioanary never results in a new line
	# and this is rather important because the NIST OSCAL schema files
	# also never end with a newline character
	if idx == len(keys) - 1:
	print(" }", end="", file=out_file)
	else:
	print(",", file=out_file)


	def main():
	for filename in OSCAL_CATALOG_FILES:
	with open(f"{filename}.json") as source_file:
	data = json.load(source_file)

	with open(f"{filename}.json", "w") as output_file:
	dump_dict(data, 1, output_file)

	if __name__ == '__main__':
	main()