|
#!/bin/bash |
|
|
|
# compare_tags.sh - Compare leaf node tags between two Immuta CSV files |
|
# Usage: ./compare_tags.sh file1.csv file2.csv |
|
|
|
if [ $# -ne 2 ]; then |
|
echo "Usage: $0 <file1.csv> <file2.csv>" |
|
exit 1 |
|
fi |
|
|
|
FILE1=$1 |
|
FILE2=$2 |
|
TEMP_DIR=$(mktemp -d) |
|
TAGS1="${TEMP_DIR}/tags1.txt" |
|
TAGS2="${TEMP_DIR}/tags2.txt" |
|
ONLY_IN_1="${TEMP_DIR}/only_in_1.txt" |
|
ONLY_IN_2="${TEMP_DIR}/only_in_2.txt" |
|
|
|
echo "Comparing tag leaf nodes between:" |
|
echo " - $FILE1" |
|
echo " - $FILE2" |
|
echo "" |
|
|
|
# Extract first column (leaf nodes), skip header, remove quotes, and sort |
|
cut -d, -f1 "$FILE1" | tail -n +2 | tr -d '"' | sort > "$TAGS1" |
|
cut -d, -f1 "$FILE2" | tail -n +2 | tr -d '"' | sort > "$TAGS2" |
|
|
|
# Find tags unique to each file |
|
comm -23 "$TAGS1" "$TAGS2" > "$ONLY_IN_1" |
|
comm -13 "$TAGS1" "$TAGS2" > "$ONLY_IN_2" |
|
|
|
# Count results |
|
COUNT1=$(wc -l < "$ONLY_IN_1") |
|
COUNT2=$(wc -l < "$ONLY_IN_2") |
|
TOTAL1=$(wc -l < "$TAGS1") |
|
TOTAL2=$(wc -l < "$TAGS2") |
|
|
|
# Display summary |
|
echo "Summary:" |
|
echo " - $FILE1: $TOTAL1 total tags" |
|
echo " - $FILE2: $TOTAL2 total tags" |
|
echo " - $COUNT1 tags only in $FILE1" |
|
echo " - $COUNT2 tags only in $FILE2" |
|
echo "" |
|
|
|
# Display detailed results if differences exist |
|
if [ $COUNT1 -gt 0 ]; then |
|
echo "Tags only in $FILE1:" |
|
cat "$ONLY_IN_1" | sed 's/^/ - /' |
|
echo "" |
|
fi |
|
|
|
if [ $COUNT2 -gt 0 ]; then |
|
echo "Tags only in $FILE2:" |
|
cat "$ONLY_IN_2" | sed 's/^/ - /' |
|
echo "" |
|
fi |
|
|
|
# Clean up |
|
rm -rf "$TEMP_DIR" |
|
|
|
echo "Comparison complete." |