Skip to content

Instantly share code, notes, and snippets.

@hkraal
Created March 24, 2020 18:05
Show Gist options
  • Save hkraal/81cd5c69411b6eb1a9109ffe1ff204b8 to your computer and use it in GitHub Desktop.
Save hkraal/81cd5c69411b6eb1a9109ffe1ff204b8 to your computer and use it in GitHub Desktop.
Script for removing messages from Graylog
#!/usr/bin/env python
"""
# Remove data from Graylog.
This script makes it possible to selectively remove data from an index set.
### Usage
* Setup an SSH tunnel to Graylog and Elasticsearch.
ssht <hostname> -L 12901:localhost:<graylog_api_port> -L 9101:localhost:<elasticsearch_http_port>
* Fill in the `graylog_index_set` variable (see graylog interface at /system/indices).
* Fill in the `graylog_password` variable.
* Fill in the `query` variable.
"""
import sys
try:
import requests
from elasticsearch import Elasticsearch
except ImportError as e:
print('Error during import: {}'.format(e))
print('Try running "pip install requests elasticsearch".')
sys.exit(1)
"""
Fill in variables below.
"""
graylog_api_url = 'http://localhost:12901/api'
graylog_index_set = ''
graylog_password = ''
query = {
"query": {
"bool": {
"filter": [
{"term": {"field_name": "value" }},
{"range": {
"timestamp": {
"gte": "2018",
"lte": "2019-04-05",
"format": "yyyy-MM-dd||yyyy"
}
}
}]
}
}
}
assert graylog_index_set != '', 'graylog_index_set is not configured.'
assert graylog_password != '', 'graylog_password is not configured.'
"""
!!! DO NOT MAKE CHANGES BELOW THIS LINE !!!
"""
# Setup ElasticSearch connection.
elasticsearch_host = 'localhost:9101'
es = Elasticsearch(elasticsearch_host)
# Get list of indices from Graylog API.
indices_list_url = '{}/system/indexer/indices/{}/list'.format(
graylog_api_url,
graylog_index_set,
)
auth = requests.auth.HTTPBasicAuth('admin', graylog_password)
indices_list_response = requests.get(indices_list_url, auth=auth)
# Loop over all indexes after the current active one.
indices = indices_list_response.json()['all']['indices']
for indice_name in sorted(indices.keys(), reverse=True)[1:]:
es_index_url = 'http://{}/{}'.format(
elasticsearch_host,
indice_name,
)
# Run query to see if any data matching the query is present.
res = es.search(
index=indice_name,
body=query)
print("{}: {} hits".format(
indice_name,
res['hits']['total']
))
# Only remove data if we have any hits.
if res['hits']['total'] != 0:
print('Making index "{}" writable.'.format(indice_name))
r = requests.put(
'{}/_settings'.format(es_index_url),
json={"index": {"blocks.write": False}},
)
print('Removing messages')
# Delete messages
res = es.delete_by_query(
index=indice_name,
body=query,
request_timeout=300,
)
print('Making index "{}" readonly.'.format(indice_name))
r = requests.put(
'{}/_settings'.format(es_index_url),
json={"index": {"blocks.write": True}},
)
# Optimize index (purge removed messages).
print('Optimizing index "{}".'.format(indice_name))
r = requests.post(
'{}/_forcemerge?only_expunge_deletes=true'.format(es_index_url),
)
# Re-calculate index range via Graylog API.
print('Re-calculating index ranges for "{}".'.format(indice_name))
indice_range_url = '{}/system/indices/ranges/{}/rebuild'.format(
graylog_api_url,
indice_name,
)
r = requests.post(indice_range_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment