Skip to content

Instantly share code, notes, and snippets.

@frikky
Last active October 7, 2025 21:45
Show Gist options
  • Save frikky/595b09abf59fcbb470ff2b08aef4e0bd to your computer and use it in GitHub Desktop.
Save frikky/595b09abf59fcbb470ff2b08aef4e0bd to your computer and use it in GitHub Desktop.
Deletion of autogenerated keys in shuffle's datastore
import requests
import json
# OpenSearch endpoint
host = "https://localhost:9200"
index = "org_cache"
username = "admin" # replace with your username
password = "admin" # replace with your password
auth = (username, password) # all requests will use this
scroll_time = "25m"
scroll_size = 1000
headers = {"Content-Type": "application/json"}
# Initialize scroll
url = f"{host}/{index}/_search?scroll={scroll_time}"
payload = {
"_source": False,
"size": scroll_size,
"query": {"match_all": {}}
}
response = requests.get(url, headers=headers, data=json.dumps(payload), auth=auth, verify=False)
data = response.json()
scroll_id = data["_scroll_id"]
hits = data["hits"]["hits"]
while hits:
for doc in hits:
doc_id = doc["_id"]
# Only delete autogenerated IDs (containing "--")
if len(doc_id) == 20:
print("Deleting autogenerated doc %s" % doc_id)
del_url = f"{host}/{index}/_doc/{doc_id}"
resp = requests.delete(del_url, auth=auth, verify=False)
if resp.status_code == 200:
print(f"Deleted {doc_id}")
else:
print(f"Failed to delete {doc_id}: {resp.text}")
# Fetch next scroll batch
scroll_url = f"{host}/_search/scroll"
scroll_payload = {"scroll": scroll_time, "scroll_id": scroll_id}
response = requests.post(scroll_url, headers=headers, data=json.dumps(scroll_payload), auth=auth, verify=False)
data = response.json()
hits = data["hits"]["hits"]
if "_scroll_id" in data:
scroll_id = data["_scroll_id"]
else:
print("No new scroll ID found.")
break
# Clear scroll context
clear_url = f"{host}/_search/scroll"
requests.delete(clear_url, headers=headers, data=json.dumps({"scroll_id": [scroll_id]}), auth=auth, verify=False)
print("Finished deleting all autogenerated IDs.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment