Skip to content

Instantly share code, notes, and snippets.

@adngdb
Last active March 22, 2016 17:09
Show Gist options
  • Save adngdb/c2fea567d49d20369ed3 to your computer and use it in GitHub Desktop.
Save adngdb/c2fea567d49d20369ed3 to your computer and use it in GitHub Desktop.
import datetime
import elasticsearch
from elasticsearch import helpers
from socorrolib.lib.datetimeutil import utc_now
def dl_es_data(es_index, es_doctype, date):
"""Return all documents of an index.
See documentation:
* https://www.elastic.co/guide/en/elasticsearch/reference/1.4/search-request-scroll.html
* https://elasticsearch-py.readthedocs.org/en/1.2.0/helpers.html#elasticsearch.helpers.scan
"""
es_context = elasticsearch.Elasticsearch(hosts=['localhost:9222'])
next_day = date + datetime.timedelta(days=1)
query = {
'filter': {
'bool': {
'must': [
{
'range': {
'processed_crash.date_processed': {
'gte': date.isoformat(),
'lte': next_day.isoformat(),
}
}
},
{
'term': {
'processed_crash.product': 'firefox'
}
},
{
'term': {
'processed_crash.version': '45.0'
}
}
]
}
}
}
res = helpers.scan(
es_context,
scroll='1m', # keep the "scroll" connection open for 1 minute.
index=es_index,
doc_type=es_doctype,
fields=[
'processed_crash.uuid',
'processed_crash.product',
'processed_crash.version',
],
query=query,
)
for hit in res:
print(hit['fields'])
if __name__ == '__main__':
today = utc_now().replace(hour=0, minute=0, second=0, microsecond=0)
es_index = today.strftime('socorro%Y%W')
dl_es_data(es_index, 'crash_reports', today)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment