Created
July 9, 2021 14:34
-
-
Save tuckerzp/1d27fa2fae81a56cdd2323667abff480 to your computer and use it in GitHub Desktop.
Format json schemes using NIST's format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
from typing import Any, Dict, List, Union, TextIO | |
OSCAL_CATALOG_FILES = [ | |
"oscal_catalog_schema", | |
"oscal_component_schema", | |
"oscal_profile_schema", | |
"oscal_ssp_schema" | |
] | |
# Basic type alias. Honestly not sure that we'll ever see | |
# null but it's good to do our best to handle it when we can. | |
JsonTypes = Union[str, bool, type(None), int] | |
# Using a consistent JSON encoder rather than re-creating one as | |
# json.JSONEncoder().encode() every time that we need to encode is | |
# slightly more efficient. We might be able to just use `json.dumps` | |
# but this stemmed from originally trying to do all this with | |
# json.JSONEncoder().iterencode() | |
ENCODER = json.JSONEncoder() | |
def dump_list(data: List[JsonTypes], level: int, out_file: TextIO): | |
""" | |
Dump a list in the same format as the one used by the OSCAL schema files. | |
""" | |
# Since lists are pretty short and simple, we can go ahead and just format | |
# all the items as JSON strings right away. This is because in the JSON | |
# lists we're dealing with, lists always contain simple data types. | |
# It is important to note that if they did not, this would not work and | |
# we've have to have some logic to handle the various types like we do | |
# in dump_dict. | |
items = [ENCODER.encode(item) for item in data] | |
for idx, value in enumerate(items): | |
if idx == 0: | |
print(file=out_file) | |
print(" " * level * 2, end="", file=out_file) | |
print("[ ", end="", file=out_file) | |
level += 1 | |
else: | |
print(" " * level * 2, end="", file=out_file) | |
print(value, end="", file=out_file) | |
if idx == len(items) - 1: | |
print(" ]", end="", file=out_file) | |
else: | |
print(",", file=out_file) | |
def dump_dict(data: Dict[str, Any], level: int, out_file: TextIO): | |
""" | |
Dump a dictionary in the same format used by the OSCAL Schema files. | |
:param data: The data to dump | |
:param level: The level of indentation | |
:param out_file: The file to write to | |
""" | |
keys = list(data.keys()) | |
for idx, key in enumerate(keys): | |
value = data[key] | |
# Handle the first item in the dictionary specially; this is required | |
# because the new indentation technically doesn't take place until the | |
# second key for the line itself (though interstingly, because '{ ' is | |
# two characters, they keys still stay aligned | |
# Results in a line that starts like: | |
# { KEY : | |
# for the first line and otherwise just starts with the proper indentation | |
# level for all other lines, then with KEY : | |
if idx == 0: | |
# This particular print is important as the NIST OSCAL schema files | |
# always start with a blank line so our logic needs to account for that | |
print(file=out_file) | |
print(" " * level * 2, end="", file=out_file) | |
print("{ ", end="", file=out_file) | |
level += 1 | |
else: | |
print(" " * level * 2, end="", file=out_file) | |
# The key is always going to be a string, which is one of the lovely things | |
# about JSON. This means that we can just dump it as a JSON-encoded string | |
# and continue. | |
print(ENCODER.encode(key), end=" ", file=out_file) | |
print(":", end=" ", file=out_file) | |
# We've got to handle only a few supported types correctly. | |
# The "basic" types can just be dumped directly and we'll | |
# handle the formatting for , vs } at the end here | |
if isinstance(value, (str, int, bool, type(None))): | |
print(ENCODER.encode(value), end="", file=out_file) | |
# Dictionaries result in a recursive call to this function | |
# which the logic at the start of the function handles cleanly. | |
# Theoretically if we had too many layers of dictionaries we | |
# could run into issues with stack depth but that is pretty | |
# unlikely. | |
if isinstance(value, dict): | |
dump_dict(value, level, out_file) | |
# Lists are the only complex/special type in addition to | |
# dictionaries that we support | |
if isinstance(value, list): | |
dump_list(value, level, out_file) | |
# The end of the dictioanary never results in a new line | |
# and this is rather important because the NIST OSCAL schema files | |
# also never end with a newline character | |
if idx == len(keys) - 1: | |
print(" }", end="", file=out_file) | |
else: | |
print(",", file=out_file) | |
def main(): | |
for filename in OSCAL_CATALOG_FILES: | |
with open(f"{filename}.json") as source_file: | |
data = json.load(source_file) | |
with open(f"{filename}.json", "w") as output_file: | |
dump_dict(data, 1, output_file) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment