Last active
October 28, 2021 19:16
-
-
Save ajkerrigan/19735440cf035ae6a10c4ef1577f6936 to your computer and use it in GitHub Desktop.
Extract CCM to SOC2 control mappings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| First pass at extracting a mapping of CCM 3.0.1 controls to their associated SOC 2 | |
| control IDs. | |
| Dependencies: openpyxl pandas | |
| """ | |
| import json | |
| from itertools import islice | |
| import click | |
| import openpyxl | |
| import pandas as pd | |
| @click.command( | |
| help="Given a CCM 3.0.1 spreadsheet in Excel format, output a mapping of " | |
| "CCM control ID -> SOC2 control IDs in JSON format." | |
| ) | |
| @click.option( | |
| "-f", | |
| "--input-file", | |
| required=True, | |
| type=click.Path(), | |
| help="A CCM 3.0.1 spreadsheet in Excel format", | |
| ) | |
| @click.option("-o", "--output-file", type=click.File(mode="w"), default="-") | |
| def cli(input_file, output_file): | |
| wb = openpyxl.open(input_file) | |
| ws = wb.active | |
| data = ws.values | |
| # The CCM spreadsheet has column headings spread across the first four rows of | |
| # the sheet. Use the last non-empty row as the real/reference column heading. | |
| headings = list(zip(*islice(data, 4))) | |
| cols = [next(i for i in (*reversed(h), "N/A") if i) for h in headings] | |
| # With the header rows out of the way, convert the remaining data to a Pandas | |
| # DataFrame. | |
| data = list(data) | |
| df = pd.DataFrame(data, columns=cols) | |
| # Give friendlier names to columns we care about. | |
| df.rename( | |
| columns={ | |
| "CCM V3.0 Control ID": "ccm_control_id", | |
| "AICPA\n2014 TSC": "soc2_control_id", | |
| }, | |
| inplace=True, | |
| ) | |
| # Each CCM control row can have 0 or more associated SOC2 control IDs separated by newlines. | |
| # Expand those here. | |
| df["soc2_control_ids"] = df.soc2_control_id.str.split("\n") | |
| df.set_index("ccm_control_id", inplace=True) | |
| # Dump a JSON mapping of CCM control ID --> SOC2 control IDs. | |
| df[["soc2_control_ids"]].dropna().to_json(output_file, orient="index", indent=4) | |
| if __name__ == "__main__": | |
| cli() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment