Skip to content

Instantly share code, notes, and snippets.

@davidlu1001
Last active May 14, 2025 02:05
Show Gist options
  • Select an option

  • Save davidlu1001/f5120f842a07a4b5d18493b5a461346e to your computer and use it in GitHub Desktop.

Select an option

Save davidlu1001/f5120f842a07a4b5d18493b5a461346e to your computer and use it in GitHub Desktop.
immuta-tag-hierarchy-extractor.js
// immuta-tag-hierarchy-extractor.js
// Version 8.0 - Multi-format tag hierarchy extractor with CSV and JSON outputs
// For use with Immuta Governance interface
(function extractImmutaTagHierarchy() {
console.log("Starting Immuta Tag Hierarchy extraction...");
// Find the governance section
const governanceSection = document.querySelector('[class*="governance"], .governance, div.governance');
if (!governanceSection) {
console.error("❌ Cannot find governance section. Make sure you're on the Immuta governance page.");
return;
}
// Store all tag hierarchies
const tagHierarchies = [];
const processedPaths = new Set(); // To avoid duplicates
const processedElements = new Set(); // To avoid processing the same element twice
// Helper function to determine if an element is a tag based on the provided examples
function isTagElement(element) {
if (!element || !element.textContent.trim()) return false;
// Case 1: Clickable tag with mat-button-wrapper class (from examples 1 and 3)
const isButtonWrapper =
element.classList &&
element.classList.contains('mat-button-wrapper') &&
element.classList.contains('pxl-button-wrapper');
// Case 2: Tag with data-test-bind="tag-title" (from example 2)
const hasTagTitleAttribute =
element.getAttribute('data-test-bind') === 'tag-title';
// Case 3: Check parent structure for button tags
const isInButton =
element.tagName === 'SPAN' &&
element.parentElement &&
element.parentElement.tagName === 'BUTTON';
// Case 4: Deep structure check based on provided XPaths
const isInPurposesOrTags = element.closest('purposes-or-tags') !== null;
return isButtonWrapper || hasTagTitleAttribute || isInButton || isInPurposesOrTags;
}
// Build path from element to root using the provided XPath patterns as guidance
function buildTagHierarchy(element) {
const path = [];
// Add the tag name
const tagName = element.textContent.trim();
if (tagName) {
path.push(tagName);
} else {
return null; // Skip empty tags
}
// Track traversed elements to avoid loops
const visitedElements = new Set();
// Traverse up to build hierarchy based on XPath patterns from the examples
function traverseUp(el) {
if (!el || el === document.body || visitedElements.has(el)) {
return;
}
visitedElements.add(el);
// Check if this is a table row (tr)
if (el.tagName === 'TR') {
// First cell might contain category info
const firstCell = el.querySelector('td:first-child');
if (firstCell && firstCell.textContent.trim() && !path.includes(firstCell.textContent.trim())) {
path.unshift(firstCell.textContent.trim());
}
// Move to table structure
const table = el.closest('table');
if (table) {
traverseUp(table.parentElement);
return;
}
}
// Check for purposes-or-tags (seen in all examples)
if (el.tagName === 'PURPOSES-OR-TAGS' || el.matches('[class*="purposes-or-tags"]')) {
// Look for category titles in this structure
const categoryEl = el.querySelector('[class*="title"], [class*="header"]');
if (categoryEl && categoryEl.textContent.trim() && !path.includes(categoryEl.textContent.trim())) {
path.unshift(categoryEl.textContent.trim());
}
// Continue traversing up from parent
traverseUp(el.parentElement);
return;
}
// Check for ng-component (seen in examples)
if (el.matches('ng-component, [class*="ng-component"]')) {
// Look for titles in the component
const titles = el.querySelectorAll('h1, h2, h3, h4, [class*="title"], [class*="header"]');
titles.forEach(title => {
const titleText = title.textContent.trim();
if (titleText && !path.includes(titleText)) {
path.unshift(titleText);
}
});
// Continue up
traverseUp(el.parentElement);
return;
}
// Check for expandable sections with dp-tree classes (from examples)
if (el.matches('[class*="dp-tree"], [id*="dp-tree"]')) {
// Get tree value
const treeValue = el.querySelector('[class*="dp-tree-value"]');
if (treeValue && treeValue.textContent.trim() && !path.includes(treeValue.textContent.trim())) {
path.unshift(treeValue.textContent.trim());
}
// Continue up
traverseUp(el.parentElement);
return;
}
// Default - just go up one level
traverseUp(el.parentElement);
}
// Start traversal from parent element
traverseUp(element.parentElement);
// Add Governance as the root if not already included
if (path.length > 0 && !path.includes('Governance')) {
path.unshift('Governance');
}
return path.length > 1 ? path : null;
}
try {
// Strategy 1: Find elements matching the specific patterns from the examples
// Find clickable tags with mat-button-wrapper (examples 1 and 3)
const buttonWrapperTags = document.querySelectorAll('span.mat-button-wrapper.pxl-button-wrapper');
console.log(`Found ${buttonWrapperTags.length} tags with mat-button-wrapper class`);
buttonWrapperTags.forEach(element => {
if (processedElements.has(element)) return;
processedElements.add(element);
const hierarchyPath = buildTagHierarchy(element);
if (hierarchyPath) {
const pathStr = hierarchyPath.join(' β†’ ');
if (!processedPaths.has(pathStr)) {
tagHierarchies.push(hierarchyPath);
processedPaths.add(pathStr);
}
}
});
// Find tags with data-test-bind="tag-title" (example 2)
const tagTitleElements = document.querySelectorAll('[data-test-bind="tag-title"]');
console.log(`Found ${tagTitleElements.length} tags with data-test-bind="tag-title"`);
tagTitleElements.forEach(element => {
if (processedElements.has(element)) return;
processedElements.add(element);
const hierarchyPath = buildTagHierarchy(element);
if (hierarchyPath) {
const pathStr = hierarchyPath.join(' β†’ ');
if (!processedPaths.has(pathStr)) {
tagHierarchies.push(hierarchyPath);
processedPaths.add(pathStr);
}
}
});
// Strategy 2: Use the full XPaths from examples to find similar patterns
// Build XPath patterns based on the examples
const xpathPatterns = [
// From example 1
'//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//button//span',
// From example 2
'//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//span[@data-test-bind="tag-title"]',
// From example 3
'//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//button//span'
];
xpathPatterns.forEach((pattern, index) => {
try {
const xpathResult = document.evaluate(pattern, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
console.log(`XPath pattern ${index+1} found ${xpathResult.snapshotLength} elements`);
for (let i = 0; i < xpathResult.snapshotLength; i++) {
const element = xpathResult.snapshotItem(i);
if (!element || processedElements.has(element)) continue;
processedElements.add(element);
const hierarchyPath = buildTagHierarchy(element);
if (hierarchyPath) {
const pathStr = hierarchyPath.join(' β†’ ');
if (!processedPaths.has(pathStr)) {
tagHierarchies.push(hierarchyPath);
processedPaths.add(pathStr);
}
}
}
} catch (error) {
console.warn(`Error with XPath pattern ${index+1}:`, error);
}
});
// Strategy 3: Use exact selectors from examples
const specificSelectors = [
// From example 1
'#dp-tree-8 > div > button > span.mat-button-wrapper.pxl-button-wrapper',
// From example 2
'#dp-tree-8 > div > span',
// From example 3
'#dp-tree-3 > div > button > span.mat-button-wrapper.pxl-button-wrapper'
];
specificSelectors.forEach((selector, index) => {
try {
const element = document.querySelector(selector);
if (element && !processedElements.has(element)) {
processedElements.add(element);
const hierarchyPath = buildTagHierarchy(element);
if (hierarchyPath) {
const pathStr = hierarchyPath.join(' β†’ ');
if (!processedPaths.has(pathStr)) {
tagHierarchies.push(hierarchyPath);
processedPaths.add(pathStr);
}
}
}
} catch (error) {
console.warn(`Error with selector ${index+1}:`, error);
}
});
// Strategy 4: Look for any elements inside purposes-or-tags structure
const purposesOrTagsElements = document.querySelectorAll('purposes-or-tags button span, purposes-or-tags [data-test-bind="tag-title"]');
console.log(`Found ${purposesOrTagsElements.length} elements inside purposes-or-tags`);
purposesOrTagsElements.forEach(element => {
if (processedElements.has(element)) return;
processedElements.add(element);
const hierarchyPath = buildTagHierarchy(element);
if (hierarchyPath) {
const pathStr = hierarchyPath.join(' β†’ ');
if (!processedPaths.has(pathStr)) {
tagHierarchies.push(hierarchyPath);
processedPaths.add(pathStr);
}
}
});
// Clean up any strange characters in paths
tagHierarchies.forEach(path => {
for (let i = 0; i < path.length; i++) {
// Remove any strange Unicode characters or artifacts
path[i] = path[i].replace(/[\u2190-\u21FF\u2500-\u257F]/g, '');
path[i] = path[i].replace(/Ò†'/g, ''); // Specifically remove the Ò†' character
path[i] = path[i].trim();
}
});
// Filter out any empty paths that might have been created
const filteredHierarchies = tagHierarchies.filter(path =>
path.length > 1 && !path.some(segment => segment === '')
);
// Sort hierarchies for better organization
filteredHierarchies.sort((a, b) => {
for (let i = 0; i < Math.min(a.length, b.length); i++) {
if (a[i] !== b[i]) {
return a[i].localeCompare(b[i]);
}
}
return a.length - b.length;
});
console.log(`Found ${tagHierarchies.length} paths, filtered to ${filteredHierarchies.length} valid paths`);
// Generate improved hierarchical Excel format with leaf node in first column
function generateImprovedExcelFormat() {
// Determine max depth
let maxDepth = 0;
filteredHierarchies.forEach(path => {
maxDepth = Math.max(maxDepth, path.length);
});
// Create column headers with leaf node as first column
const headers = ["Leaf Node"]; // First column for leaf node
for (let i = 0; i < maxDepth; i++) {
headers.push(`Level ${i+1}`);
}
// Create CSV content with UTF-8 BOM
const csvLines = [headers.map(h => `"${h}"`).join(',')];
filteredHierarchies.forEach(path => {
// Create array with extra column for the leaf node
const row = new Array(maxDepth + 1).fill('');
// First column contains the leaf node (last element in the path)
const leafNode = path[path.length - 1];
row[0] = `"""${leafNode.replace(/"/g, '""')}"""`;
// Fill in the full hierarchy path
path.forEach((tag, index) => {
row[index + 1] = `"""${tag.replace(/"/g, '""')}"""`;
});
csvLines.push(row.join(','));
});
return {
content: '\ufeff' + csvLines.join('\n'), // UTF-8 BOM at start
filename: 'immuta_tags_hierarchical.csv'
};
}
// Generate tree-structured JSON
function generateTreeJSON(hierarchies) {
// Create a root node
const rootNode = { name: "root", children: [] };
hierarchies.forEach(path => {
let currentNode = rootNode;
// Build tree by traversing path
path.forEach(segment => {
// Check if this segment already exists as a child
let childNode = currentNode.children.find(child => child.name === segment);
// If it doesn't exist, create it
if (!childNode) {
childNode = { name: segment, children: [] };
currentNode.children.push(childNode);
}
// Move to this child for the next iteration
currentNode = childNode;
});
});
// If all paths start with the same segment (e.g., "Governance"), use that as the actual root
if (rootNode.children.length === 1) {
return { tags: rootNode.children };
}
return { tags: rootNode.children };
}
// Generate flat JSON structure
function generateFlatJSON(hierarchies) {
const nodes = [];
let nextId = 1;
const nameToIdMap = new Map(); // Maps "parentName|name" to id
hierarchies.forEach(path => {
let parentId = null;
let parentName = null;
// Process each level in the path
path.forEach(segment => {
// Create a key that combines parent name and current name to handle
// cases where the same tag name appears in different parts of the hierarchy
const nodeKey = `${parentName || ""}|${segment}`;
// Check if we've already created this node in this position
let nodeId = nameToIdMap.get(nodeKey);
if (nodeId === undefined) {
// Create a new node
nodeId = nextId++;
nameToIdMap.set(nodeKey, nodeId);
nodes.push({
id: nodeId,
name: segment,
parentId: parentId
});
}
// Update parent for next level
parentId = nodeId;
parentName = segment;
});
});
return { nodes };
}
// Download function for any file type
function downloadFile(content, filename, type) {
try {
// Create the appropriate blob based on file type
let blob;
if (type === 'json') {
blob = new Blob([JSON.stringify(content, null, 2)], { type: 'application/json' });
} else if (type === 'csv') {
blob = new Blob([content], { type: 'text/csv;charset=utf-8;' });
} else {
throw new Error(`Unsupported file type: ${type}`);
}
const url = URL.createObjectURL(blob);
// Create and trigger the download
const link = document.createElement('a');
link.href = url;
link.download = filename;
link.style.display = 'none';
document.body.appendChild(link);
link.click();
// Cleanup
setTimeout(() => {
document.body.removeChild(link);
URL.revokeObjectURL(url);
}, 100);
return true;
} catch (error) {
console.error(`Failed to download ${filename}:`, error);
return false;
}
}
// Generate all formats
const csvFormat = generateImprovedExcelFormat();
const treeJSON = generateTreeJSON(filteredHierarchies);
const flatJSON = generateFlatJSON(filteredHierarchies);
// Download all formats
const csvDownloaded = downloadFile(csvFormat.content, csvFormat.filename, 'csv');
const treeDownloaded = downloadFile(treeJSON, 'immuta_tags_tree.json', 'json');
const flatDownloaded = downloadFile(flatJSON, 'immuta_tags_flat.json', 'json');
console.log(`βœ… Success! Extracted ${filteredHierarchies.length} tag hierarchies`);
console.log(`πŸ“„ Files have been downloaded:`);
console.log(` - ${csvFormat.filename} (CSV format with leaf nodes for easy analysis)`);
console.log(` - immuta_tags_tree.json (Nested tree structure for hierarchy visualization)`);
console.log(` - immuta_tags_flat.json (Flat structure for easier programmatic processing)`);
// Preview of extracted paths with quotes
console.log("Preview of extracted paths:");
filteredHierarchies.slice(0, 5).forEach(path => {
const leafNode = path[path.length - 1];
console.log(` - Leaf: "${leafNode}" | Path: ` + path.map(tag => `"${tag}"`).join(' β†’ '));
});
return {
count: filteredHierarchies.length,
formats: {
csv: csvFormat.filename,
treeJson: 'immuta_tags_tree.json',
flatJson: 'immuta_tags_flat.json'
},
preview: filteredHierarchies.slice(0, 5).map(p => {
const leaf = p[p.length - 1];
return {
leaf: leaf,
path: p.map(tag => `"${tag}"`).join(' β†’ ')
};
})
};
} catch (error) {
console.error("❌ Error extracting tag hierarchies:", error);
return {
error: error.message
};
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment