Last active
September 13, 2024 21:45
-
-
Save davidfischer-ch/cdfede27ac053a8332b2127becc07608 to your computer and use it in GitHub Desktop.
sonar_sarif_to_generic.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Convert SARIF to Generic SonarQube issues import format. | |
Links: | |
* https://community.sonarsource.com/t/import-sarif-results-as-security-hotspots/83223 | |
* docs.sonarqube.org/9.8/analyzing-source-code/importing-external-issues/generic-issue-import-format | |
* https://gist.github.com/davidfischer-ch/cdfede27ac053a8332b2127becc07608 | |
Author: David Fischer <[email protected]> | |
""" | |
from __future__ import annotations | |
from pathlib import Path | |
from typing import Final | |
import collections | |
import json | |
import os | |
import sys | |
# https://docs.oasis-open.org/sarif/sarif/v2.1.0/os/sarif-v2.1.0-os.html#_Toc34317648 | |
# SonarQube severity can be one of BLOCKER, CRITICAL, MAJOR, MINOR, INFO | |
LEVEL_TO_SERVERITY: Final[dict[str, str]] = { | |
'warning': 'MAJOR', | |
'error': 'CRITICAL', | |
'note': 'MINOR', | |
'none': 'INFO' | |
} | |
DEFAULT_REPORT_TYPE: Final[str] = 'CODE_SMELL' | |
REPORT_TYPE_BY_ENGINE: Final[dict[str, str]] = { | |
'ansible-lint': 'CODE_SMELL', | |
'robocop': 'CODE_SMELL', | |
'tflint': 'CODE_SMELL' | |
} | |
Position = collections.namedtuple('Position', ['line', 'column']) | |
def main(source: Path | str, target: Path | str) -> None: # pylint:disable=too-many-locals | |
source = Path(source).resolve() | |
target = Path(target).resolve() | |
if target.exists(): | |
raise IOError(f'Target file "{target}" already exist.') | |
sarif_data: dict = json.loads(source.read_text(encoding='utf-8')) | |
if 'sarif' not in sarif_data['$schema']: | |
raise ValueError('Source is (probably) not a valid sarif file.') | |
issues: list[dict] = [] | |
for run_index, run_data in enumerate(sarif_data['runs'], 1): | |
driver_data = run_data['tool']['driver'] | |
engine_id = driver_data['name'] | |
engine_key = engine_id.lower() | |
rules: dict[str, dict] = {rule['id']: rule for rule in driver_data.get('rules', {})} | |
for result_index, result_data in enumerate(run_data['results'], 1): | |
# Code is not programmed to handle multiple locations, because ... Its a WIP | |
if (num_locations := len(result_data['locations'])) != 1: | |
raise NotImplementedError( | |
f'File {source} : run[{run_index}].results[{result_index}].locations[] ' | |
f'size expected 1, actual {num_locations}') | |
rule_id = result_data['ruleId'] | |
rule_data = rules[rule_id] if rules else {} # Only if rules is not empty | |
location_data = result_data['locations'][0]['physicalLocation'] | |
file_path = location_data['artifactLocation']['uri'] | |
message_lines = [ | |
result_data['message']['text'], | |
'' | |
f"Help: {rule_data.get('help', {}).get('text') or '<empty>'}", | |
f"URL: {rule_data.get('helpUri') or '<empty>'}" | |
] | |
if tags := rule_data.get('properties', {}).get('tags', []): | |
message_lines.append(f"Tags: {', '.join(clean_tag(tag) for tag in tags)}") | |
issue = { | |
'engineId': engine_id, | |
'primaryLocation': { | |
'filePath': file_path, | |
'message': '\n'.join(message_lines) | |
}, | |
'ruleId': rule_id, | |
'severity': LEVEL_TO_SERVERITY[result_data['level']], | |
'type': REPORT_TYPE_BY_ENGINE.get(engine_key, DEFAULT_REPORT_TYPE) | |
} | |
# Converting location data | |
start = Position( | |
location_data['region']['startLine'] - 1, | |
location_data['region'].get('startColumn', 1) - 1) | |
end = Position( | |
location_data['region'].get('endLine', start.line + 1) - 1, | |
location_data['region'].get('endColumn', start.column + 1) - 1) | |
# Fix location data for some tools (data is wrong or missing) | |
if engine_key in {'ansible-lint', 'robocop'}: | |
# Ensure the end position makes sense or fix it | |
lines = Path(file_path).read_text(encoding='utf-8').split(os.linesep) | |
if start == end or (end.column and end.column > len(lines[end.line])): | |
prev_start, prev_end = start, end | |
if end.line + 1 < len(lines): | |
# Move end position to next line at column 0 | |
end = Position(end.line + 1, 0) | |
else: | |
# Move start to previous line at same column | |
# Move end position to same line at column 0 | |
start = Position(start.line - 1, start.column) | |
end = Position(end.line, 0) | |
assert start.line >= 0, (start, end) | |
print( | |
f"Wrong indexation (0-indexed) {file_path}: " | |
f"(start={tuple(prev_start)} end={tuple(prev_end)}), " | |
f"fix it by setting start={tuple(start)} end={tuple(end)}") | |
# Lines are 1-indexed both in SARIF and Sonar Generic | |
# Columns are 1-indexed in SARIF 0-indexed in Sonar Generic | |
issue['primaryLocation']['textRange'] = { | |
'startLine': start.line + 1, | |
'startColumn': start.column, | |
'endLine': end.line + 1, | |
'endColumn': end.column | |
} | |
issues.append(issue) | |
target.write_text(json.dumps({'issues': issues}, indent=2), encoding='utf-8') | |
def clean_tag(value: str) -> str: | |
return f"'{value}'" if ' ' in value else value | |
if __name__ == '__main__': | |
main(sys.argv[1], sys.argv[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment