Last active
October 7, 2025 21:45
-
-
Save frikky/595b09abf59fcbb470ff2b08aef4e0bd to your computer and use it in GitHub Desktop.
Deletion of autogenerated keys in shuffle's datastore
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
# OpenSearch endpoint | |
host = "https://localhost:9200" | |
index = "org_cache" | |
username = "admin" # replace with your username | |
password = "admin" # replace with your password | |
auth = (username, password) # all requests will use this | |
scroll_time = "25m" | |
scroll_size = 1000 | |
headers = {"Content-Type": "application/json"} | |
# Initialize scroll | |
url = f"{host}/{index}/_search?scroll={scroll_time}" | |
payload = { | |
"_source": False, | |
"size": scroll_size, | |
"query": {"match_all": {}} | |
} | |
response = requests.get(url, headers=headers, data=json.dumps(payload), auth=auth, verify=False) | |
data = response.json() | |
scroll_id = data["_scroll_id"] | |
hits = data["hits"]["hits"] | |
while hits: | |
for doc in hits: | |
doc_id = doc["_id"] | |
# Only delete autogenerated IDs (containing "--") | |
if len(doc_id) == 20: | |
print("Deleting autogenerated doc %s" % doc_id) | |
del_url = f"{host}/{index}/_doc/{doc_id}" | |
resp = requests.delete(del_url, auth=auth, verify=False) | |
if resp.status_code == 200: | |
print(f"Deleted {doc_id}") | |
else: | |
print(f"Failed to delete {doc_id}: {resp.text}") | |
# Fetch next scroll batch | |
scroll_url = f"{host}/_search/scroll" | |
scroll_payload = {"scroll": scroll_time, "scroll_id": scroll_id} | |
response = requests.post(scroll_url, headers=headers, data=json.dumps(scroll_payload), auth=auth, verify=False) | |
data = response.json() | |
hits = data["hits"]["hits"] | |
if "_scroll_id" in data: | |
scroll_id = data["_scroll_id"] | |
else: | |
print("No new scroll ID found.") | |
break | |
# Clear scroll context | |
clear_url = f"{host}/_search/scroll" | |
requests.delete(clear_url, headers=headers, data=json.dumps({"scroll_id": [scroll_id]}), auth=auth, verify=False) | |
print("Finished deleting all autogenerated IDs.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment