Last active
October 26, 2018 13:27
-
-
Save yoshi0309/69a95fa6e0d0a22a0207 to your computer and use it in GitHub Desktop.
delete documents by query result for Amazon CloudSearch.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import sys | |
import urllib | |
import urllib2 | |
import json | |
# you need to set your domain endpoints. | |
SEARCH_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com" | |
DOCUMENT_ENDPOINT = "XXXXX.us-east-1.cloudsearch.amazonaws.com" | |
API_VERSION = "2013-01-01" | |
def searchDocuments(queryParams): | |
query = urllib.urlencode(queryParams) | |
url = "http://" + SEARCH_ENDPOINT + "/" + API_VERSION + "/search" | |
# send query | |
result = urllib2.urlopen(url, query) | |
if(result.code == 200): | |
body = result.read() | |
return body | |
else: | |
raise Exception("Error occured while sending search query. Response Code:" + result.code) | |
def parseIdListFromBody(data): | |
jsondata = json.loads(data) | |
docList = jsondata["hits"]["hit"] | |
idList = [] | |
for doc in docList: | |
idList.append(doc["id"]) | |
return idList | |
def createSDFforDelete(idList): | |
dataList = [] | |
for i in idList: | |
data = {'type':'delete','id': i} | |
dataList.append(data) | |
return json.dumps(dataList) | |
def sendSDF(sdf): | |
url = "http://" + DOCUMENT_ENDPOINT + "/" + API_VERSION + "/documents/batch" | |
request = urllib2.Request(url) | |
request.add_header("Content-Type", "application/json") | |
request.add_data(sdf) | |
# send query | |
print urllib2.urlopen(request).read() | |
if __name__ == '__main__': | |
# build search query. | |
query = "*:*" | |
queryParser = "lucene" | |
size = "1000" | |
returnFieldName = "title" | |
queryParams = {"q" : query, "q.parser" : queryParser, "size" : size, "return" : returnFieldName} | |
data = searchDocuments(queryParams) | |
idList = parseIdListFromBody(data) | |
sdf = createSDFforDelete(idList) | |
sendSDF(sdf) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
you need to set "DOCUMENT_ENDPOINT", "SEARCH_ENDPOINT" and search parameters befor you run.