davidlu1001 · May 13, 2025 10:38
diff --git a/compare_tags.sh b/compare_tags.sh
 #!/bin/bash

 # compare_tags.sh - Compare leaf node tags between two Immuta CSV files
 # Usage: ./compare_tags.sh file1.csv file2.csv

 if [ $# -ne 2 ]; then
    echo "Usage: $0 <file1.csv> <file2.csv>"
    exit 1
 fi

 FILE1=$1
 FILE2=$2
 TEMP_DIR=$(mktemp -d)
 TAGS1="${TEMP_DIR}/tags1.txt"
 TAGS2="${TEMP_DIR}/tags2.txt"
 ONLY_IN_1="${TEMP_DIR}/only_in_1.txt"
 ONLY_IN_2="${TEMP_DIR}/only_in_2.txt"

 echo "Comparing tag leaf nodes between:"
 echo "  - $FILE1"
 echo "  - $FILE2"
 echo ""

 # Extract first column (leaf nodes), skip header, remove quotes, and sort
 cut -d, -f1 "$FILE1" | tail -n +2 | tr -d '"' | sort > "$TAGS1"
 cut -d, -f1 "$FILE2" | tail -n +2 | tr -d '"' | sort > "$TAGS2"

 # Find tags unique to each file
 comm -23 "$TAGS1" "$TAGS2" > "$ONLY_IN_1"
 comm -13 "$TAGS1" "$TAGS2" > "$ONLY_IN_2"

 # Count results
 COUNT1=$(wc -l < "$ONLY_IN_1")
 COUNT2=$(wc -l < "$ONLY_IN_2")
 TOTAL1=$(wc -l < "$TAGS1")
 TOTAL2=$(wc -l < "$TAGS2")

 # Display summary
 echo "Summary:"
 echo "  - $FILE1: $TOTAL1 total tags"
 echo "  - $FILE2: $TOTAL2 total tags"
 echo "  - $COUNT1 tags only in $FILE1"
 echo "  - $COUNT2 tags only in $FILE2"
 echo ""

 # Display detailed results if differences exist
 if [ $COUNT1 -gt 0 ]; then
    echo "Tags only in $FILE1:"
    cat "$ONLY_IN_1" | sed 's/^/  - /'
    echo ""
 fi

 if [ $COUNT2 -gt 0 ]; then
    echo "Tags only in $FILE2:"
    cat "$ONLY_IN_2" | sed 's/^/  - /'
    echo ""
 fi

 # Clean up
 rm -rf "$TEMP_DIR"

 echo "Comparison complete."
	#!/bin/bash

	# compare_tags.sh - Compare leaf node tags between two Immuta CSV files
	# Usage: ./compare_tags.sh file1.csv file2.csv

	if [ $# -ne 2 ]; then
	echo "Usage: $0 <file1.csv> <file2.csv>"
	exit 1
	fi

	FILE1=$1
	FILE2=$2
	TEMP_DIR=$(mktemp -d)
	TAGS1="${TEMP_DIR}/tags1.txt"
	TAGS2="${TEMP_DIR}/tags2.txt"
	ONLY_IN_1="${TEMP_DIR}/only_in_1.txt"
	ONLY_IN_2="${TEMP_DIR}/only_in_2.txt"

	echo "Comparing tag leaf nodes between:"
	echo " - $FILE1"
	echo " - $FILE2"
	echo ""

	# Extract first column (leaf nodes), skip header, remove quotes, and sort
	cut -d, -f1 "$FILE1" \| tail -n +2 \| tr -d '"' \| sort > "$TAGS1"
	cut -d, -f1 "$FILE2" \| tail -n +2 \| tr -d '"' \| sort > "$TAGS2"

	# Find tags unique to each file
	comm -23 "$TAGS1" "$TAGS2" > "$ONLY_IN_1"
	comm -13 "$TAGS1" "$TAGS2" > "$ONLY_IN_2"

	# Count results
	COUNT1=$(wc -l < "$ONLY_IN_1")
	COUNT2=$(wc -l < "$ONLY_IN_2")
	TOTAL1=$(wc -l < "$TAGS1")
	TOTAL2=$(wc -l < "$TAGS2")

	# Display summary
	echo "Summary:"
	echo " - $FILE1: $TOTAL1 total tags"
	echo " - $FILE2: $TOTAL2 total tags"
	echo " - $COUNT1 tags only in $FILE1"
	echo " - $COUNT2 tags only in $FILE2"
	echo ""

	# Display detailed results if differences exist
	if [ $COUNT1 -gt 0 ]; then
	echo "Tags only in $FILE1:"
	cat "$ONLY_IN_1" \| sed 's/^/ - /'
	echo ""
	fi

	if [ $COUNT2 -gt 0 ]; then
	echo "Tags only in $FILE2:"
	cat "$ONLY_IN_2" \| sed 's/^/ - /'
	echo ""
	fi

	# Clean up
	rm -rf "$TEMP_DIR"

	echo "Comparison complete."