Skip to content

Instantly share code, notes, and snippets.

@davidlu1001
Created May 13, 2025 10:38
Show Gist options
  • Save davidlu1001/c5821bd07f8db994dc5a941b85546388 to your computer and use it in GitHub Desktop.
Save davidlu1001/c5821bd07f8db994dc5a941b85546388 to your computer and use it in GitHub Desktop.
compare_tags.sh
#!/bin/bash
# compare_tags.sh - Compare leaf node tags between two Immuta CSV files
# Usage: ./compare_tags.sh file1.csv file2.csv
if [ $# -ne 2 ]; then
echo "Usage: $0 <file1.csv> <file2.csv>"
exit 1
fi
FILE1=$1
FILE2=$2
TEMP_DIR=$(mktemp -d)
TAGS1="${TEMP_DIR}/tags1.txt"
TAGS2="${TEMP_DIR}/tags2.txt"
ONLY_IN_1="${TEMP_DIR}/only_in_1.txt"
ONLY_IN_2="${TEMP_DIR}/only_in_2.txt"
echo "Comparing tag leaf nodes between:"
echo " - $FILE1"
echo " - $FILE2"
echo ""
# Extract first column (leaf nodes), skip header, remove quotes, and sort
cut -d, -f1 "$FILE1" | tail -n +2 | tr -d '"' | sort > "$TAGS1"
cut -d, -f1 "$FILE2" | tail -n +2 | tr -d '"' | sort > "$TAGS2"
# Find tags unique to each file
comm -23 "$TAGS1" "$TAGS2" > "$ONLY_IN_1"
comm -13 "$TAGS1" "$TAGS2" > "$ONLY_IN_2"
# Count results
COUNT1=$(wc -l < "$ONLY_IN_1")
COUNT2=$(wc -l < "$ONLY_IN_2")
TOTAL1=$(wc -l < "$TAGS1")
TOTAL2=$(wc -l < "$TAGS2")
# Display summary
echo "Summary:"
echo " - $FILE1: $TOTAL1 total tags"
echo " - $FILE2: $TOTAL2 total tags"
echo " - $COUNT1 tags only in $FILE1"
echo " - $COUNT2 tags only in $FILE2"
echo ""
# Display detailed results if differences exist
if [ $COUNT1 -gt 0 ]; then
echo "Tags only in $FILE1:"
cat "$ONLY_IN_1" | sed 's/^/ - /'
echo ""
fi
if [ $COUNT2 -gt 0 ]; then
echo "Tags only in $FILE2:"
cat "$ONLY_IN_2" | sed 's/^/ - /'
echo ""
fi
# Clean up
rm -rf "$TEMP_DIR"
echo "Comparison complete."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment