Skip to content

Instantly share code, notes, and snippets.

@ajkerrigan
Last active October 28, 2021 19:16
Show Gist options
  • Select an option

  • Save ajkerrigan/19735440cf035ae6a10c4ef1577f6936 to your computer and use it in GitHub Desktop.

Select an option

Save ajkerrigan/19735440cf035ae6a10c4ef1577f6936 to your computer and use it in GitHub Desktop.
Extract CCM to SOC2 control mappings
"""
First pass at extracting a mapping of CCM 3.0.1 controls to their associated SOC 2
control IDs.
Dependencies: openpyxl pandas
"""
import json
from itertools import islice
import click
import openpyxl
import pandas as pd
@click.command(
help="Given a CCM 3.0.1 spreadsheet in Excel format, output a mapping of "
"CCM control ID -> SOC2 control IDs in JSON format."
)
@click.option(
"-f",
"--input-file",
required=True,
type=click.Path(),
help="A CCM 3.0.1 spreadsheet in Excel format",
)
@click.option("-o", "--output-file", type=click.File(mode="w"), default="-")
def cli(input_file, output_file):
wb = openpyxl.open(input_file)
ws = wb.active
data = ws.values
# The CCM spreadsheet has column headings spread across the first four rows of
# the sheet. Use the last non-empty row as the real/reference column heading.
headings = list(zip(*islice(data, 4)))
cols = [next(i for i in (*reversed(h), "N/A") if i) for h in headings]
# With the header rows out of the way, convert the remaining data to a Pandas
# DataFrame.
data = list(data)
df = pd.DataFrame(data, columns=cols)
# Give friendlier names to columns we care about.
df.rename(
columns={
"CCM V3.0 Control ID": "ccm_control_id",
"AICPA\n2014 TSC": "soc2_control_id",
},
inplace=True,
)
# Each CCM control row can have 0 or more associated SOC2 control IDs separated by newlines.
# Expand those here.
df["soc2_control_ids"] = df.soc2_control_id.str.split("\n")
df.set_index("ccm_control_id", inplace=True)
# Dump a JSON mapping of CCM control ID --> SOC2 control IDs.
df[["soc2_control_ids"]].dropna().to_json(output_file, orient="index", indent=4)
if __name__ == "__main__":
cli()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment