Skip to content

Instantly share code, notes, and snippets.

@tuckerzp
Created July 9, 2021 14:34
Show Gist options
  • Save tuckerzp/1d27fa2fae81a56cdd2323667abff480 to your computer and use it in GitHub Desktop.
Save tuckerzp/1d27fa2fae81a56cdd2323667abff480 to your computer and use it in GitHub Desktop.
Format json schemes using NIST's format
#!/usr/bin/env python3
import json
from typing import Any, Dict, List, Union, TextIO
OSCAL_CATALOG_FILES = [
"oscal_catalog_schema",
"oscal_component_schema",
"oscal_profile_schema",
"oscal_ssp_schema"
]
# Basic type alias. Honestly not sure that we'll ever see
# null but it's good to do our best to handle it when we can.
JsonTypes = Union[str, bool, type(None), int]
# Using a consistent JSON encoder rather than re-creating one as
# json.JSONEncoder().encode() every time that we need to encode is
# slightly more efficient. We might be able to just use `json.dumps`
# but this stemmed from originally trying to do all this with
# json.JSONEncoder().iterencode()
ENCODER = json.JSONEncoder()
def dump_list(data: List[JsonTypes], level: int, out_file: TextIO):
"""
Dump a list in the same format as the one used by the OSCAL schema files.
"""
# Since lists are pretty short and simple, we can go ahead and just format
# all the items as JSON strings right away. This is because in the JSON
# lists we're dealing with, lists always contain simple data types.
# It is important to note that if they did not, this would not work and
# we've have to have some logic to handle the various types like we do
# in dump_dict.
items = [ENCODER.encode(item) for item in data]
for idx, value in enumerate(items):
if idx == 0:
print(file=out_file)
print(" " * level * 2, end="", file=out_file)
print("[ ", end="", file=out_file)
level += 1
else:
print(" " * level * 2, end="", file=out_file)
print(value, end="", file=out_file)
if idx == len(items) - 1:
print(" ]", end="", file=out_file)
else:
print(",", file=out_file)
def dump_dict(data: Dict[str, Any], level: int, out_file: TextIO):
"""
Dump a dictionary in the same format used by the OSCAL Schema files.
:param data: The data to dump
:param level: The level of indentation
:param out_file: The file to write to
"""
keys = list(data.keys())
for idx, key in enumerate(keys):
value = data[key]
# Handle the first item in the dictionary specially; this is required
# because the new indentation technically doesn't take place until the
# second key for the line itself (though interstingly, because '{ ' is
# two characters, they keys still stay aligned
# Results in a line that starts like:
# { KEY :
# for the first line and otherwise just starts with the proper indentation
# level for all other lines, then with KEY :
if idx == 0:
# This particular print is important as the NIST OSCAL schema files
# always start with a blank line so our logic needs to account for that
print(file=out_file)
print(" " * level * 2, end="", file=out_file)
print("{ ", end="", file=out_file)
level += 1
else:
print(" " * level * 2, end="", file=out_file)
# The key is always going to be a string, which is one of the lovely things
# about JSON. This means that we can just dump it as a JSON-encoded string
# and continue.
print(ENCODER.encode(key), end=" ", file=out_file)
print(":", end=" ", file=out_file)
# We've got to handle only a few supported types correctly.
# The "basic" types can just be dumped directly and we'll
# handle the formatting for , vs } at the end here
if isinstance(value, (str, int, bool, type(None))):
print(ENCODER.encode(value), end="", file=out_file)
# Dictionaries result in a recursive call to this function
# which the logic at the start of the function handles cleanly.
# Theoretically if we had too many layers of dictionaries we
# could run into issues with stack depth but that is pretty
# unlikely.
if isinstance(value, dict):
dump_dict(value, level, out_file)
# Lists are the only complex/special type in addition to
# dictionaries that we support
if isinstance(value, list):
dump_list(value, level, out_file)
# The end of the dictioanary never results in a new line
# and this is rather important because the NIST OSCAL schema files
# also never end with a newline character
if idx == len(keys) - 1:
print(" }", end="", file=out_file)
else:
print(",", file=out_file)
def main():
for filename in OSCAL_CATALOG_FILES:
with open(f"{filename}.json") as source_file:
data = json.load(source_file)
with open(f"{filename}.json", "w") as output_file:
dump_dict(data, 1, output_file)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment