Last active
May 14, 2025 02:05
-
-
Save davidlu1001/f5120f842a07a4b5d18493b5a461346e to your computer and use it in GitHub Desktop.
immuta-tag-hierarchy-extractor.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // immuta-tag-hierarchy-extractor.js | |
| // Version 8.0 - Multi-format tag hierarchy extractor with CSV and JSON outputs | |
| // For use with Immuta Governance interface | |
| (function extractImmutaTagHierarchy() { | |
| console.log("Starting Immuta Tag Hierarchy extraction..."); | |
| // Find the governance section | |
| const governanceSection = document.querySelector('[class*="governance"], .governance, div.governance'); | |
| if (!governanceSection) { | |
| console.error("β Cannot find governance section. Make sure you're on the Immuta governance page."); | |
| return; | |
| } | |
| // Store all tag hierarchies | |
| const tagHierarchies = []; | |
| const processedPaths = new Set(); // To avoid duplicates | |
| const processedElements = new Set(); // To avoid processing the same element twice | |
| // Helper function to determine if an element is a tag based on the provided examples | |
| function isTagElement(element) { | |
| if (!element || !element.textContent.trim()) return false; | |
| // Case 1: Clickable tag with mat-button-wrapper class (from examples 1 and 3) | |
| const isButtonWrapper = | |
| element.classList && | |
| element.classList.contains('mat-button-wrapper') && | |
| element.classList.contains('pxl-button-wrapper'); | |
| // Case 2: Tag with data-test-bind="tag-title" (from example 2) | |
| const hasTagTitleAttribute = | |
| element.getAttribute('data-test-bind') === 'tag-title'; | |
| // Case 3: Check parent structure for button tags | |
| const isInButton = | |
| element.tagName === 'SPAN' && | |
| element.parentElement && | |
| element.parentElement.tagName === 'BUTTON'; | |
| // Case 4: Deep structure check based on provided XPaths | |
| const isInPurposesOrTags = element.closest('purposes-or-tags') !== null; | |
| return isButtonWrapper || hasTagTitleAttribute || isInButton || isInPurposesOrTags; | |
| } | |
| // Build path from element to root using the provided XPath patterns as guidance | |
| function buildTagHierarchy(element) { | |
| const path = []; | |
| // Add the tag name | |
| const tagName = element.textContent.trim(); | |
| if (tagName) { | |
| path.push(tagName); | |
| } else { | |
| return null; // Skip empty tags | |
| } | |
| // Track traversed elements to avoid loops | |
| const visitedElements = new Set(); | |
| // Traverse up to build hierarchy based on XPath patterns from the examples | |
| function traverseUp(el) { | |
| if (!el || el === document.body || visitedElements.has(el)) { | |
| return; | |
| } | |
| visitedElements.add(el); | |
| // Check if this is a table row (tr) | |
| if (el.tagName === 'TR') { | |
| // First cell might contain category info | |
| const firstCell = el.querySelector('td:first-child'); | |
| if (firstCell && firstCell.textContent.trim() && !path.includes(firstCell.textContent.trim())) { | |
| path.unshift(firstCell.textContent.trim()); | |
| } | |
| // Move to table structure | |
| const table = el.closest('table'); | |
| if (table) { | |
| traverseUp(table.parentElement); | |
| return; | |
| } | |
| } | |
| // Check for purposes-or-tags (seen in all examples) | |
| if (el.tagName === 'PURPOSES-OR-TAGS' || el.matches('[class*="purposes-or-tags"]')) { | |
| // Look for category titles in this structure | |
| const categoryEl = el.querySelector('[class*="title"], [class*="header"]'); | |
| if (categoryEl && categoryEl.textContent.trim() && !path.includes(categoryEl.textContent.trim())) { | |
| path.unshift(categoryEl.textContent.trim()); | |
| } | |
| // Continue traversing up from parent | |
| traverseUp(el.parentElement); | |
| return; | |
| } | |
| // Check for ng-component (seen in examples) | |
| if (el.matches('ng-component, [class*="ng-component"]')) { | |
| // Look for titles in the component | |
| const titles = el.querySelectorAll('h1, h2, h3, h4, [class*="title"], [class*="header"]'); | |
| titles.forEach(title => { | |
| const titleText = title.textContent.trim(); | |
| if (titleText && !path.includes(titleText)) { | |
| path.unshift(titleText); | |
| } | |
| }); | |
| // Continue up | |
| traverseUp(el.parentElement); | |
| return; | |
| } | |
| // Check for expandable sections with dp-tree classes (from examples) | |
| if (el.matches('[class*="dp-tree"], [id*="dp-tree"]')) { | |
| // Get tree value | |
| const treeValue = el.querySelector('[class*="dp-tree-value"]'); | |
| if (treeValue && treeValue.textContent.trim() && !path.includes(treeValue.textContent.trim())) { | |
| path.unshift(treeValue.textContent.trim()); | |
| } | |
| // Continue up | |
| traverseUp(el.parentElement); | |
| return; | |
| } | |
| // Default - just go up one level | |
| traverseUp(el.parentElement); | |
| } | |
| // Start traversal from parent element | |
| traverseUp(element.parentElement); | |
| // Add Governance as the root if not already included | |
| if (path.length > 0 && !path.includes('Governance')) { | |
| path.unshift('Governance'); | |
| } | |
| return path.length > 1 ? path : null; | |
| } | |
| try { | |
| // Strategy 1: Find elements matching the specific patterns from the examples | |
| // Find clickable tags with mat-button-wrapper (examples 1 and 3) | |
| const buttonWrapperTags = document.querySelectorAll('span.mat-button-wrapper.pxl-button-wrapper'); | |
| console.log(`Found ${buttonWrapperTags.length} tags with mat-button-wrapper class`); | |
| buttonWrapperTags.forEach(element => { | |
| if (processedElements.has(element)) return; | |
| processedElements.add(element); | |
| const hierarchyPath = buildTagHierarchy(element); | |
| if (hierarchyPath) { | |
| const pathStr = hierarchyPath.join(' β '); | |
| if (!processedPaths.has(pathStr)) { | |
| tagHierarchies.push(hierarchyPath); | |
| processedPaths.add(pathStr); | |
| } | |
| } | |
| }); | |
| // Find tags with data-test-bind="tag-title" (example 2) | |
| const tagTitleElements = document.querySelectorAll('[data-test-bind="tag-title"]'); | |
| console.log(`Found ${tagTitleElements.length} tags with data-test-bind="tag-title"`); | |
| tagTitleElements.forEach(element => { | |
| if (processedElements.has(element)) return; | |
| processedElements.add(element); | |
| const hierarchyPath = buildTagHierarchy(element); | |
| if (hierarchyPath) { | |
| const pathStr = hierarchyPath.join(' β '); | |
| if (!processedPaths.has(pathStr)) { | |
| tagHierarchies.push(hierarchyPath); | |
| processedPaths.add(pathStr); | |
| } | |
| } | |
| }); | |
| // Strategy 2: Use the full XPaths from examples to find similar patterns | |
| // Build XPath patterns based on the examples | |
| const xpathPatterns = [ | |
| // From example 1 | |
| '//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//button//span', | |
| // From example 2 | |
| '//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//span[@data-test-bind="tag-title"]', | |
| // From example 3 | |
| '//div[contains(@class, "governance")]//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//purposes-or-tags//table//tbody//tr//td//div//div//button//span' | |
| ]; | |
| xpathPatterns.forEach((pattern, index) => { | |
| try { | |
| const xpathResult = document.evaluate(pattern, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | |
| console.log(`XPath pattern ${index+1} found ${xpathResult.snapshotLength} elements`); | |
| for (let i = 0; i < xpathResult.snapshotLength; i++) { | |
| const element = xpathResult.snapshotItem(i); | |
| if (!element || processedElements.has(element)) continue; | |
| processedElements.add(element); | |
| const hierarchyPath = buildTagHierarchy(element); | |
| if (hierarchyPath) { | |
| const pathStr = hierarchyPath.join(' β '); | |
| if (!processedPaths.has(pathStr)) { | |
| tagHierarchies.push(hierarchyPath); | |
| processedPaths.add(pathStr); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.warn(`Error with XPath pattern ${index+1}:`, error); | |
| } | |
| }); | |
| // Strategy 3: Use exact selectors from examples | |
| const specificSelectors = [ | |
| // From example 1 | |
| '#dp-tree-8 > div > button > span.mat-button-wrapper.pxl-button-wrapper', | |
| // From example 2 | |
| '#dp-tree-8 > div > span', | |
| // From example 3 | |
| '#dp-tree-3 > div > button > span.mat-button-wrapper.pxl-button-wrapper' | |
| ]; | |
| specificSelectors.forEach((selector, index) => { | |
| try { | |
| const element = document.querySelector(selector); | |
| if (element && !processedElements.has(element)) { | |
| processedElements.add(element); | |
| const hierarchyPath = buildTagHierarchy(element); | |
| if (hierarchyPath) { | |
| const pathStr = hierarchyPath.join(' β '); | |
| if (!processedPaths.has(pathStr)) { | |
| tagHierarchies.push(hierarchyPath); | |
| processedPaths.add(pathStr); | |
| } | |
| } | |
| } | |
| } catch (error) { | |
| console.warn(`Error with selector ${index+1}:`, error); | |
| } | |
| }); | |
| // Strategy 4: Look for any elements inside purposes-or-tags structure | |
| const purposesOrTagsElements = document.querySelectorAll('purposes-or-tags button span, purposes-or-tags [data-test-bind="tag-title"]'); | |
| console.log(`Found ${purposesOrTagsElements.length} elements inside purposes-or-tags`); | |
| purposesOrTagsElements.forEach(element => { | |
| if (processedElements.has(element)) return; | |
| processedElements.add(element); | |
| const hierarchyPath = buildTagHierarchy(element); | |
| if (hierarchyPath) { | |
| const pathStr = hierarchyPath.join(' β '); | |
| if (!processedPaths.has(pathStr)) { | |
| tagHierarchies.push(hierarchyPath); | |
| processedPaths.add(pathStr); | |
| } | |
| } | |
| }); | |
| // Clean up any strange characters in paths | |
| tagHierarchies.forEach(path => { | |
| for (let i = 0; i < path.length; i++) { | |
| // Remove any strange Unicode characters or artifacts | |
| path[i] = path[i].replace(/[\u2190-\u21FF\u2500-\u257F]/g, ''); | |
| path[i] = path[i].replace(/Γ’β '/g, ''); // Specifically remove the Γ’β ' character | |
| path[i] = path[i].trim(); | |
| } | |
| }); | |
| // Filter out any empty paths that might have been created | |
| const filteredHierarchies = tagHierarchies.filter(path => | |
| path.length > 1 && !path.some(segment => segment === '') | |
| ); | |
| // Sort hierarchies for better organization | |
| filteredHierarchies.sort((a, b) => { | |
| for (let i = 0; i < Math.min(a.length, b.length); i++) { | |
| if (a[i] !== b[i]) { | |
| return a[i].localeCompare(b[i]); | |
| } | |
| } | |
| return a.length - b.length; | |
| }); | |
| console.log(`Found ${tagHierarchies.length} paths, filtered to ${filteredHierarchies.length} valid paths`); | |
| // Generate improved hierarchical Excel format with leaf node in first column | |
| function generateImprovedExcelFormat() { | |
| // Determine max depth | |
| let maxDepth = 0; | |
| filteredHierarchies.forEach(path => { | |
| maxDepth = Math.max(maxDepth, path.length); | |
| }); | |
| // Create column headers with leaf node as first column | |
| const headers = ["Leaf Node"]; // First column for leaf node | |
| for (let i = 0; i < maxDepth; i++) { | |
| headers.push(`Level ${i+1}`); | |
| } | |
| // Create CSV content with UTF-8 BOM | |
| const csvLines = [headers.map(h => `"${h}"`).join(',')]; | |
| filteredHierarchies.forEach(path => { | |
| // Create array with extra column for the leaf node | |
| const row = new Array(maxDepth + 1).fill(''); | |
| // First column contains the leaf node (last element in the path) | |
| const leafNode = path[path.length - 1]; | |
| row[0] = `"""${leafNode.replace(/"/g, '""')}"""`; | |
| // Fill in the full hierarchy path | |
| path.forEach((tag, index) => { | |
| row[index + 1] = `"""${tag.replace(/"/g, '""')}"""`; | |
| }); | |
| csvLines.push(row.join(',')); | |
| }); | |
| return { | |
| content: '\ufeff' + csvLines.join('\n'), // UTF-8 BOM at start | |
| filename: 'immuta_tags_hierarchical.csv' | |
| }; | |
| } | |
| // Generate tree-structured JSON | |
| function generateTreeJSON(hierarchies) { | |
| // Create a root node | |
| const rootNode = { name: "root", children: [] }; | |
| hierarchies.forEach(path => { | |
| let currentNode = rootNode; | |
| // Build tree by traversing path | |
| path.forEach(segment => { | |
| // Check if this segment already exists as a child | |
| let childNode = currentNode.children.find(child => child.name === segment); | |
| // If it doesn't exist, create it | |
| if (!childNode) { | |
| childNode = { name: segment, children: [] }; | |
| currentNode.children.push(childNode); | |
| } | |
| // Move to this child for the next iteration | |
| currentNode = childNode; | |
| }); | |
| }); | |
| // If all paths start with the same segment (e.g., "Governance"), use that as the actual root | |
| if (rootNode.children.length === 1) { | |
| return { tags: rootNode.children }; | |
| } | |
| return { tags: rootNode.children }; | |
| } | |
| // Generate flat JSON structure | |
| function generateFlatJSON(hierarchies) { | |
| const nodes = []; | |
| let nextId = 1; | |
| const nameToIdMap = new Map(); // Maps "parentName|name" to id | |
| hierarchies.forEach(path => { | |
| let parentId = null; | |
| let parentName = null; | |
| // Process each level in the path | |
| path.forEach(segment => { | |
| // Create a key that combines parent name and current name to handle | |
| // cases where the same tag name appears in different parts of the hierarchy | |
| const nodeKey = `${parentName || ""}|${segment}`; | |
| // Check if we've already created this node in this position | |
| let nodeId = nameToIdMap.get(nodeKey); | |
| if (nodeId === undefined) { | |
| // Create a new node | |
| nodeId = nextId++; | |
| nameToIdMap.set(nodeKey, nodeId); | |
| nodes.push({ | |
| id: nodeId, | |
| name: segment, | |
| parentId: parentId | |
| }); | |
| } | |
| // Update parent for next level | |
| parentId = nodeId; | |
| parentName = segment; | |
| }); | |
| }); | |
| return { nodes }; | |
| } | |
| // Download function for any file type | |
| function downloadFile(content, filename, type) { | |
| try { | |
| // Create the appropriate blob based on file type | |
| let blob; | |
| if (type === 'json') { | |
| blob = new Blob([JSON.stringify(content, null, 2)], { type: 'application/json' }); | |
| } else if (type === 'csv') { | |
| blob = new Blob([content], { type: 'text/csv;charset=utf-8;' }); | |
| } else { | |
| throw new Error(`Unsupported file type: ${type}`); | |
| } | |
| const url = URL.createObjectURL(blob); | |
| // Create and trigger the download | |
| const link = document.createElement('a'); | |
| link.href = url; | |
| link.download = filename; | |
| link.style.display = 'none'; | |
| document.body.appendChild(link); | |
| link.click(); | |
| // Cleanup | |
| setTimeout(() => { | |
| document.body.removeChild(link); | |
| URL.revokeObjectURL(url); | |
| }, 100); | |
| return true; | |
| } catch (error) { | |
| console.error(`Failed to download ${filename}:`, error); | |
| return false; | |
| } | |
| } | |
| // Generate all formats | |
| const csvFormat = generateImprovedExcelFormat(); | |
| const treeJSON = generateTreeJSON(filteredHierarchies); | |
| const flatJSON = generateFlatJSON(filteredHierarchies); | |
| // Download all formats | |
| const csvDownloaded = downloadFile(csvFormat.content, csvFormat.filename, 'csv'); | |
| const treeDownloaded = downloadFile(treeJSON, 'immuta_tags_tree.json', 'json'); | |
| const flatDownloaded = downloadFile(flatJSON, 'immuta_tags_flat.json', 'json'); | |
| console.log(`β Success! Extracted ${filteredHierarchies.length} tag hierarchies`); | |
| console.log(`π Files have been downloaded:`); | |
| console.log(` - ${csvFormat.filename} (CSV format with leaf nodes for easy analysis)`); | |
| console.log(` - immuta_tags_tree.json (Nested tree structure for hierarchy visualization)`); | |
| console.log(` - immuta_tags_flat.json (Flat structure for easier programmatic processing)`); | |
| // Preview of extracted paths with quotes | |
| console.log("Preview of extracted paths:"); | |
| filteredHierarchies.slice(0, 5).forEach(path => { | |
| const leafNode = path[path.length - 1]; | |
| console.log(` - Leaf: "${leafNode}" | Path: ` + path.map(tag => `"${tag}"`).join(' β ')); | |
| }); | |
| return { | |
| count: filteredHierarchies.length, | |
| formats: { | |
| csv: csvFormat.filename, | |
| treeJson: 'immuta_tags_tree.json', | |
| flatJson: 'immuta_tags_flat.json' | |
| }, | |
| preview: filteredHierarchies.slice(0, 5).map(p => { | |
| const leaf = p[p.length - 1]; | |
| return { | |
| leaf: leaf, | |
| path: p.map(tag => `"${tag}"`).join(' β ') | |
| }; | |
| }) | |
| }; | |
| } catch (error) { | |
| console.error("β Error extracting tag hierarchies:", error); | |
| return { | |
| error: error.message | |
| }; | |
| } | |
| })(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment