Skip to content

Instantly share code, notes, and snippets.

@jimklimov
Last active September 25, 2024 11:37
Show Gist options
  • Save jimklimov/79eedd857a39757ecf9b55156d9759e5 to your computer and use it in GitHub Desktop.
Save jimklimov/79eedd857a39757ecf9b55156d9759e5 to your computer and use it in GitHub Desktop.
Dependency-Track manually analyzed and/or depended-on components

PostgreSQL:

-- Helper view, indexes and methods

SET statement_timeout TO 0;

BEGIN;

-- Methods and their indices, to find UUID hits in DIRECT_DEPENDENCIES columns as text (JSON docs)

create or replace function match_json_uuid_varchar(text) returns character varying[] immutable language sql as $$ 
   select array_agg(regexp_matches[1]) from 
      regexp_matches ($1, '"uuid":"([^"]+)"','g') 
$$;

create index on "public"."PROJECT" using gin (match_json_uuid_varchar("DIRECT_DEPENDENCIES"));
create index on "public"."COMPONENT" using gin (match_json_uuid_varchar("DIRECT_DEPENDENCIES"));



create or replace function match_json_uuid_text(text) returns text[] immutable language sql as $$ 
   select array_agg(regexp_matches[1]) from 
      regexp_matches ($1, '"uuid":"([^"]+)"','g') 
$$;

create index on "public"."PROJECT" using gin (match_json_uuid_text("DIRECT_DEPENDENCIES"));
create index on "public"."COMPONENT" using gin (match_json_uuid_text("DIRECT_DEPENDENCIES"));



-- The helper materialized view - note it has to be refreshed after DB content changes, but is quick to query afterwards!

DROP MATERIALIZED VIEW IF EXISTS "public"."COMPONENT_V";

CREATE MATERIALIZED VIEW "public"."COMPONENT_V" AS
SELECT
	c.*,
	CONCAT(
		COALESCE(c."GROUP", 'None'), '|',
		COALESCE(c."NAME", 'None'), '|',
		COALESCE(c."VERSION", 'None'), '|',
		COALESCE(c."CLASSIFIER", 'None'), '|',
		COALESCE(c."CPE", 'None'), '|',
		COALESCE(c."PURL", 'None'), '|',
		COALESCE(c."SWIDTAGID", 'None')
	) AS component_strid, 
	CONCAT(
		COALESCE(c."GROUP", 'None'), '|',
		COALESCE(c."NAME", 'None'), '|',
		COALESCE(c."VERSION", 'None'), '|',
		COALESCE(c."CLASSIFIER", 'None'), '|',
		COALESCE(c."CPE", 'None'), '|',
		COALESCE(c."PURL", 'None'), '|',
		COALESCE(c."SWIDTAGID", 'None'), '|',
		COALESCE(p."UUID", 'None')
	) AS component_strid_prj,
	(SELECT COALESCE(COUNT(cc."ID"), 0) FROM "public"."COMPONENT" AS cc WHERE cc."DIRECT_DEPENDENCIES" is not null and (match_json_uuid_varchar(cc."DIRECT_DEPENDENCIES") @> ARRAY[c."UUID"]))
		AS used_by_comp,
	(SELECT COALESCE(COUNT(pp."ID"), 0) FROM "public"."PROJECT" AS pp WHERE pp."DIRECT_DEPENDENCIES" is not null and (match_json_uuid_varchar(pp."DIRECT_DEPENDENCIES") @> ARRAY[c."UUID"]))
		AS used_by_proj,
	(SELECT COALESCE(COUNT(cc."ID"), 0) FROM "public"."COMPONENT" AS cc WHERE cc."DIRECT_DEPENDENCIES" is not null and (match_json_uuid_varchar(cc."DIRECT_DEPENDENCIES") @> ARRAY[c."UUID"]))
		+ (SELECT COALESCE(COUNT(pp."ID"), 0) FROM "public"."PROJECT" AS pp WHERE pp."DIRECT_DEPENDENCIES" is not null and (match_json_uuid_varchar(pp."DIRECT_DEPENDENCIES") @> ARRAY[c."UUID"]))
		AS used_by_graph,
	(SELECT COALESCE(MAX(dm."SUPPRESSED"), 0) FROM "public"."DEPENDENCYMETRICS" as dm WHERE c."ID" = dm."COMPONENT_ID")
		AS suppressed,
	(SELECT COALESCE(MAX(dm."FINDINGS_AUDITED"), 0) FROM "public"."DEPENDENCYMETRICS" as dm WHERE c."ID" = dm."COMPONENT_ID")
		AS findings_audited,
	(SELECT COALESCE(MAX(dm."SUPPRESSED"), 0) + COALESCE(MAX(dm."FINDINGS_AUDITED"), 0) FROM "public"."DEPENDENCYMETRICS" as dm WHERE c."ID" = dm."COMPONENT_ID")
		+ (SELECT COALESCE(COUNT(*), 0) FROM "public"."VIOLATIONANALYSIS" va WHERE c."ID" = va."COMPONENT_ID")
		+ (SELECT COALESCE(COUNT(*), 0) FROM "public"."ANALYSIS" a WHERE c."ID" = a."COMPONENT_ID")
		AS manually_analyzed
FROM
	"public"."COMPONENT" AS c,
	"public"."PROJECT" AS p
WHERE
	c."PROJECT_ID" = p."ID"
;

CREATE INDEX "COMPONENT_V_strid_idx" ON "public"."COMPONENT_V"("component_strid");
CREATE INDEX "COMPONENT_V_strid_prj_idx" ON "public"."COMPONENT_V"("component_strid_prj");

REFRESH MATERIALIZED VIEW "public"."COMPONENT_V";

COMMIT;
-- Query current counts, using the helper view
-- Ideally the "not used" count should be 0 (everything known via dependency tree)!

ROLLBACK;

BEGIN;

CREATE OR REPLACE FUNCTION refresh_component_v_stats() RETURNS void AS '
BEGIN
	SET statement_timeout TO 0;

	REFRESH MATERIALIZED VIEW "public"."COMPONENT_V";
END;
' language plpgsql;

CREATE OR REPLACE FUNCTION query_component_v_stats() returns TABLE(
	"count-COMPONENT" BIGINT,
	"count-COMPONENT_V" BIGINT,
	"CV-Used-or-Analyzed" BIGINT,
	"CV-NotUsed-and-NotAnalyzed" BIGINT,
	"CV-Used-Whatever" BIGINT,
	"CV-Whatever-Analyzed" BIGINT,
	"CV-Used-and-NotAnalyzed" BIGINT,
	"CV-NotUsed-and-Analyzed" BIGINT,
	"CV-Used-and-Analyzed" BIGINT
) AS '
BEGIN
	SET statement_timeout TO 0;

	return query
		SELECT
			(SELECT COUNT(*) FROM "public"."COMPONENT") as "count-COMPONENT",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V") as "count-COMPONENT_V",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" > 0 OR c."manually_analyzed" > 0) as "CV-Used-or-Analyzed",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0) as "CV-NotUsed-and-NotAnalyzed",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" > 0) as "CV-Used-Whatever",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."manually_analyzed" > 0) as "CV-Whatever-Analyzed",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" > 0 AND c."manually_analyzed" = 0) as "CV-Used-and-NotAnalyzed",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" > 0) as "CV-NotUsed-and-Analyzed",
			(SELECT COUNT(*) FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" > 0 AND c."manually_analyzed" > 0) as "CV-Used-and-Analyzed";
END;
' language plpgsql;

CREATE OR REPLACE FUNCTION query_fresh_component_v_stats() returns TABLE(
	"count-COMPONENT" BIGINT,
	"count-COMPONENT_V" BIGINT,
	"CV-Used-or-Analyzed" BIGINT,
	"CV-NotUsed-and-NotAnalyzed" BIGINT,
	"CV-Used-Whatever" BIGINT,
	"CV-Whatever-Analyzed" BIGINT,
	"CV-Used-and-NotAnalyzed" BIGINT,
	"CV-NotUsed-and-Analyzed" BIGINT,
	"CV-Used-and-Analyzed" BIGINT
) AS '
BEGIN
	perform refresh_component_v_stats();
	return query select * from query_component_v_stats();
END;
' language plpgsql;

COMMIT;
-- Clean away helper view, indexes and methods - return DB structure to Dependency-Track vanilla (optional)
-- NOTE: Maybe suffices to remove the methods and not fuss about indices that should get dropped automatically (cascade)
-- Dry-run by default (rolled-back transaction until you are ready, see below)

BEGIN;
SAVEPOINT ddd;

DROP index "COMPONENT_V_strid_idx" CASCADE;
DROP index "COMPONENT_V_strid_prj_idx" CASCADE;

DROP index "PROJECT_match_json_uuid_text_idx" CASCADE;
DROP index "COMPONENT_match_json_uuid_text_idx" CASCADE;
DROP function match_json_uuid_text(text) CASCADE;

DROP index "PROJECT_match_json_uuid_varchar_idx" CASCADE;
DROP index "COMPONENT_match_json_uuid_varchar_idx" CASCADE;
DROP function match_json_uuid_varchar(text) CASCADE;

DROP function refresh_component_v_stats() CASCADE;
DROP function query_component_v_stats() CASCADE;
DROP function query_fresh_component_v_stats() CASCADE;

-- Check: this select should fail after cascade drops above
SELECT COUNT(*) FROM "COMPONENTS_V";

-- Uncomment to actually drop the helpers:
--ROLLBACK TO ddd;
COMMIT;
-- Actual query via function:

-- Once after substantial DB content update, e.g. during SQL-driven maintenance:
SELECT * FROM query_fresh_component_v_stats();

-- Regularly and quickly:
SELECT * FROM query_component_v_stats();
BEGIN;

-- "Unreferenced" components - no manual verdicts, not in graph
-- (should not exist unless SBOM generator made disconnected islands)
CREATE OR REPLACE FUNCTION query_component_v_unreferenced() returns TABLE(
	"ID" BIGINT,
	"NAME" VARCHAR,
	"VERSION" VARCHAR,
	"PROJECT_ID" BIGINT,
	"component_strid_prj" TEXT,
	"used_by_graph" BIGINT,
	"manually_analyzed" BIGINT
) AS '
BEGIN
	SET statement_timeout TO 0;

	return query
		SELECT DISTINCT
			c."ID", c."NAME", c."VERSION", c."PROJECT_ID",
			c."component_strid_prj",
			c."used_by_graph", c."manually_analyzed"
		FROM "public"."COMPONENT_V" C, "public"."COMPONENT_V" c0
		WHERE
			c0."used_by_graph" = 0 AND c0."manually_analyzed" = 0
			AND c."NAME" = c0."NAME" AND c."PROJECT_ID" = c0."PROJECT_ID"
		ORDER BY c."PROJECT_ID", c."NAME", c."used_by_graph", c."manually_analyzed", c."VERSION";
END;
' language plpgsql;

SELECT * FROM query_component_v_unreferenced();

COMMIT;
SET statement_timeout TO 0;

BEGIN;

create or replace function wipe_unreferenced_once() returns integer AS '
DECLARE
	countAfter  integer;
	countBefore integer;
	countDiff   integer;
BEGIN
	SET statement_timeout TO 0;

	countBefore := (SELECT COUNT(*) FROM "public"."COMPONENT_V");

	DELETE FROM "public"."COMPONENTS_VULNERABILITIES" WHERE "COMPONENT_ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);
	DELETE FROM "public"."FINDINGATTRIBUTION" WHERE "COMPONENT_ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);
	DELETE FROM "public"."ANALYSIS" WHERE "COMPONENT_ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);
	DELETE FROM "public"."DEPENDENCYMETRICS" WHERE "COMPONENT_ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);
	DELETE FROM "public"."POLICYVIOLATION" WHERE "COMPONENT_ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);
	DELETE FROM "public"."COMPONENT" WHERE "ID" IN (SELECT c."ID" FROM "public"."COMPONENT_V" c WHERE c."used_by_graph" = 0 AND c."manually_analyzed" = 0);

	REFRESH MATERIALIZED VIEW "public"."COMPONENT_V";

	countAfter := (SELECT COUNT(*) FROM "public"."COMPONENT_V");
	countDiff := countBefore - countAfter;

	RAISE NOTICE ''countBefore: % countAfter: % countDiff: %'', countBefore, countAfter, countDiff;

	return countDiff;
END;
' language plpgsql;

create or replace function wipe_unreferenced_loop() returns void AS '
DECLARE
	counter integer;
BEGIN
	counter := 1;
	WHILE wipe_unreferenced_once() > 0 LOOP
		counter := counter + 1;
		RAISE NOTICE ''Counter: %'', counter;
	END LOOP;
END;
' language plpgsql;
COMMIT;
-- Call and go for coffee
BEGIN;

SET statement_timeout TO 0;
SELECT wipe_unreferenced_loop();

COMMIT;

VACUUM FULL;
-- Clean up the method
BEGIN;
DROP function wipe_unreferenced_loop() CASCADE;
DROP function wipe_unreferenced_once() CASCADE;
COMMIT;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment