Skip to content

Instantly share code, notes, and snippets.

@mmguero
Created July 17, 2025 15:53
Show Gist options
  • Save mmguero/a365aa8b8cfd45db04d0f86569ce0004 to your computer and use it in GitHub Desktop.
Save mmguero/a365aa8b8cfd45db04d0f86569ce0004 to your computer and use it in GitHub Desktop.
find "collisions" (_version > 1) in an opensearch index (using a slow scroll)
#!/usr/bin/env bash
# Config
INDEX="arkime_sessions3-*"
SIZE=1000
SCROLL_DURATION="1m"
AUTH="--config /var/local/curlrc/.opensearch.primary.curlrc"
BASE_URL="https://opensearch:9200"
COLLISIONS_RAW="colliding_docs_raw.json"
COLLISIONS_PRETTY="colliding_docs_pretty.json"
rm -f "$COLLISIONS_RAW" "$COLLISIONS_PRETTY"
# Initial search
echo "Starting initial scroll query..."
INIT_RES=$(curl -sSL $AUTH -XPOST "$BASE_URL/$INDEX/_search?scroll=$SCROLL_DURATION&size=$SIZE" \
-H "Content-Type: application/json" \
-d'{
"version": true,
"query": {
"match_all": {}
}
}')
SCROLL_ID=$(echo "$INIT_RES" | jq -r '._scroll_id')
echo "$INIT_RES" | jq -c '.hits.hits[] | select(._version > 1)' >> "$COLLISIONS_RAW"
# Scroll loop
echo "Scrolling for more results..."
while true; do
RES=$(curl -sSL $AUTH -XPOST "$BASE_URL/_search/scroll" \
-H "Content-Type: application/json" \
-d"{\"scroll\":\"$SCROLL_DURATION\",\"scroll_id\":\"$SCROLL_ID\"}")
HITS=$(echo "$RES" | jq '.hits.hits | length')
[ "$HITS" -eq "0" ] && break
SCROLL_ID=$(echo "$RES" | jq -r '._scroll_id')
echo "$RES" | jq -c '.hits.hits[] | select(._version > 1)' >> "$COLLISIONS_RAW"
done
# Pretty-print version (optional)
jq '.' "$COLLISIONS_RAW" > "$COLLISIONS_PRETTY"
# Summary
COLLISION_COUNT=$(jq -s 'length' "$COLLISIONS_RAW")
echo "πŸ”Ž Total documents with version > 1: $COLLISION_COUNT"
echo "πŸ“ Raw collision docs: $COLLISIONS_RAW"
echo "πŸ–‹οΈ Pretty JSON version: $COLLISIONS_PRETTY"
# Optional: unique list of collided IDs
jq -r '._id' "$COLLISIONS_RAW" | sort | uniq -c | sort -nr | head -n 10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment