Skip to content

Instantly share code, notes, and snippets.

@ei-grad
Created December 13, 2015 23:44
Show Gist options
  • Save ei-grad/0aacea5dd6bf9fa6fb19 to your computer and use it in GitHub Desktop.
Save ei-grad/0aacea5dd6bf9fa6fb19 to your computer and use it in GitHub Desktop.
{
"beaker": "2",
"cells": [
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codebd1MLI",
"input": {
"body": [
"from elasticsearch_dsl import Search",
"from elasticsearch_dsl.connections import connections",
"",
"connections.configure(default={'hosts': 'http://elasticsearch:9200/'})",
" ",
"",
"LT_DATE = '2015-12-11'"
]
},
"lineCount": 7,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codebKvCvA",
"input": {
"body": [
"def es_table(s):",
" return [i['_source'] for i in s.execute().to_dict()['hits']['hits']]",
"",
"",
"es_table(",
" Search().params(request_timeout=300)",
" .index('mixpanel-*').doc_type('MainScreenAppear')",
" .query('match', **{'$city': 'Miass'})",
" .sort('-@timestamp')[:50]",
")"
]
},
"lineCount": 10,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeFx4vel",
"input": {
"body": [
"from collections import defaultdict",
"from itertools import cycle",
"import re",
"",
"import dateutil",
"",
"from matplotlib import cm",
"from random import random",
"",
"",
"class ESPlot():",
" ",
" _topic_cache = {}",
" ",
" sep = '.'",
" ",
" def __init__(self, search,",
" document_key='@timestamp',",
" xaxis_type='time',",
" yaxis_type='linear'):",
"",
" self.xaxis_type = xaxis_type",
" self.yaxis_type = yaxis_type",
" ",
" self.cmap = cm.get_cmap('Set1')",
" self._color_cycle = cycle(range(1, self.cmap.N))",
" ",
" self.plot_data = []",
" self.metric_data = defaultdict(list)",
" self.element_types = {}",
" ",
" self.search = search",
" ",
" self.response = search.execute()",
" ",
" response = self.response.to_dict()",
" ",
" for doc in response['hits']['hits']:",
" key = doc.get(document_key, doc.get('_source', {}).get(document_key))",
" if key is None:",
" continue",
" if isinstance(key, str):",
" key = int(dateutil.parser.parse(key).timestamp() * 1000)",
" for i in doc:",
" if i in [document_key]:",
" continue",
" self.process_metric(i, key, doc[i])",
" ",
" if 'aggregations' in response:",
" for key, value in response['aggregations'].items():",
" self.process_aggregation(key, value)",
" ",
" for k, v in self.metric_data.items():",
" self.add_elements(k, v)",
" ",
" def process_aggregation(self, name, data):",
" assert 'buckets' in data",
" for bucket in data['buckets']:",
" self.process_bucket(name, bucket)",
"",
" def process_bucket(self, name, bucket, key=None):",
"",
" if isinstance(bucket['key'], str):",
" bucket_key = key",
" bucket_name = self.join(name, bucket['key'])",
" else:",
" bucket_key = bucket['key']",
" bucket_name = name",
" ",
" for member in bucket:",
" if member in ['key', 'key_as_string']:",
" continue",
" if isinstance(bucket[member], dict) and 'buckets' in bucket[member]:",
" for i in bucket[member]['buckets']:",
" self.process_bucket(self.join(bucket_name, member), i, bucket_key)",
" elif bucket_key:",
" self.process_metric(self.join(bucket_name, member), bucket_key, bucket[member])",
"",
" def process_metric(self, name, key, value):",
" if isinstance(value, dict):",
" if value.keys() == set(['upper', 'lower']):",
" if name not in self.element_types:",
" self.element_types[name] = 'stem'",
" self.metric_data[name].append({",
" 'x': key, 'y': value['lower'], 'y2': value['upper']",
" })",
" else:",
" for metric, value in value.items():",
" if metric in ['value_as_string']:",
" continue",
" self.process_metric(self.join(name, metric), key, value)",
" else:",
" self.metric_data[name].append({'x': key, 'y': value})",
" ",
" def add_elements(self, legend, elements, **kwargs):",
" ret = {",
" 'type': self.element_types.get(legend, 'line'),",
" 'legend': legend,",
" 'elements': elements,",
" }",
" ret.update(kwargs)",
" if ret['type'] in ('stem', 'bar') and 'width' not in ret and len(elements) > 1:",
" ret['width'] = elements[1]['x'] - elements[0]['x']",
" if 'color' not in ret:",
" ret['color'] = self.next_color()",
" self.plot_data.append(ret)",
"",
" def join(self, *args):",
" return self.sep.join([args[0]] + [i.replace(self.sep, '%%%x' % ord(self.sep)) for i in args[1:]])",
" ",
" @staticmethod",
" def rgb_to_hex(rgb):",
" return '#%02x%02x%02x' % tuple(int(i * 256) for i in rgb[:3])",
"",
" def next_color(self):",
" return self.rgb_to_hex(self.cmap(random()))",
"",
" @staticmethod",
" def translate(pat):",
" i, n = 0, len(pat)",
" res = ''",
" while i < n:",
" c = pat[i]",
" i = i+1",
" if c == '*':",
" res = res + '[^.]*'",
" elif c == '#':",
" res = res + '.*'",
" else:",
" res = res + re.escape(c)",
" return res + '$\\Z(?ms)'",
"",
" def topic_match(self, name, pat):",
" _cache = self._topic_cache",
" try:",
" re_pat = _cache[pat]",
" except KeyError:",
" res = self.translate(pat)",
" if len(_cache) >= 100:",
" _cache.clear()",
" _cache[pat] = re_pat = re.compile(res)",
" return re_pat.match(name) is not None",
"",
" def show(self, only=None, exclude=None, **kwargs):",
" ",
" data = self.plot_data",
" ",
" if only:",
" if isinstance(only, str):",
" only = [only]",
" data = [",
" i for i in data",
" if any([self.topic_match(i['legend'], j) for j in only])",
" ] ",
" ",
" if exclude:",
" if isinstance(exclude, str):",
" exclude = [exclude]",
" data = [",
" i for i in data",
" if not any([self.topic_match(i['legend'], j) for j in exclude])",
" ]",
" ",
" data.sort(key=lambda x: x['legend'])",
" ",
" ret = {",
" 'type': 'Plot',",
" 'xAxis': {'type': 'time'},",
" 'useToolTip': True,",
" 'showLegend': True,",
" 'data': data,",
" }",
" ret.update(kwargs)",
" ",
" return ret"
],
"hidden": true
},
"lineCount": 175,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codegoUfGD",
"input": {
"body": [
"s = (",
" Search().params(request_timeout=300)",
" .index('topbeat-*').doc_type('system')",
" .sort('@timestamp')",
")",
"ESPlot(s[:s.count()]).show(only='*.cpu.*_p')"
]
},
"lineCount": 6,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeFkLtc4",
"input": {
"body": [
"s = Search().params(request_timeout=300).index('mixpanel-*')",
"s.aggs.bucket(",
" \"hourly\", \"date_histogram\",",
" interval=\"hour\", field=\"@timestamp\"",
")",
"ESPlot(s[:0]).show()"
]
},
"lineCount": 6,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeLVurin",
"input": {
"body": [
"s = Search().params(request_timeout=300).index('mixpanel-*')",
"s = s.query('range', **{'@timestamp': {'lt': LT_DATE}})",
"s.aggs.bucket(",
" \"daily\", \"date_histogram\",",
" interval=\"day\", field=\"@timestamp\"",
")",
"s.aggs['daily'].bucket(",
" \"prediction\", \"moving_avg\",",
" buckets_path=\"active_users\",",
" model=\"holt_winters\", window=7 * 10,",
" minimize=True, settings={'period': 7},",
" predict=21,",
")",
"s.aggs['daily'].metric(\"active_users\", \"cardinality\", field=\"distinct_id\")",
"ESPlot(s[:0]).show(exclude=\"*.doc_count\")"
]
},
"lineCount": 15,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeTIGYJ9",
"input": {
"body": [
"s = Search().params(request_timeout=300).index('mixpanel-*')",
"s = s.query('range', **{'@timestamp': {'lt': LT_DATE}})",
"s.aggs.bucket(",
" \"hourly\", \"date_histogram\",",
" interval=\"hour\", field=\"@timestamp\"",
")",
"s.aggs['hourly'].bucket(",
" \"prediction\", \"moving_avg\",",
" buckets_path=\"active_users\",",
" model=\"holt_winters\", window=90 * 24,",
" minimize=True, settings={'period': 7 * 24},",
" predict=14 * 24,",
")",
"s.aggs['hourly'].metric(\"active_users\", \"cardinality\", field=\"distinct_id\")",
"ESPlot(s[:0]).show(exclude=\"*.doc_count\")"
]
},
"lineCount": 15,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeF65PyF",
"input": {
"body": [
"s = Search().params(request_timeout=300).index('mixpanel-*')",
"s.aggs.bucket(\"daily\", \"date_histogram\", interval=\"day\", field=\"@timestamp\")",
"s.aggs['daily'].bucket(\"hourly\", \"date_histogram\", interval=\"hour\", field=\"@timestamp\")",
"s.aggs['daily']['hourly'].metric(\"hau\", \"cardinality\", field=\"distinct_id\")",
"s.aggs['daily'].metric(\"day_stats\", \"extended_stats_bucket\", buckets_path=\"hourly>hau\")",
"ESPlot(s[:0]).show(only='*.day_stats.*')"
]
},
"lineCount": 6,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codedSAbli",
"input": {
"body": [
"s = Search().params(request_timeout=300).index('mixpanel-*')",
"s = s.query('range', **{\"@timestamp\": {\"gte\": \"2015-12-01\", \"lt\": \"2015-12-11\"}})",
"s.aggs.bucket(\"region\", \"terms\", field=\"$region\").bucket(\"city\", \"terms\", field=\"$city\", size=5)",
"s.aggs['region'].bucket(",
" \"hourly\", \"date_histogram\",",
" interval=\"hour\", field=\"@timestamp\"",
").metric(",
" \"unique_users\", \"cardinality\", field=\"distinct_id\"",
")",
"s.aggs['region']['city'].bucket(",
" \"hourly\", \"date_histogram\",",
" interval=\"hour\", field=\"@timestamp\"",
").metric(",
" \"active_users\", \"cardinality\", field=\"distinct_id\"",
")",
"plot = ESPlot(s[:0])"
]
},
"lineCount": 16,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeiuZMNX",
"input": {
"body": [
"plot.show(only=\"*.*.*.*.*\")"
]
},
"lineCount": 1,
"output": {},
"type": "code"
},
{
"evaluator": "Python3",
"evaluatorReader": true,
"id": "codeceYZJY",
"input": {
"body": [
"plot.show(only=\"region.Chelyabinsk.#.value\")"
]
},
"lineCount": 1,
"output": {},
"type": "code"
}
],
"evaluators": [
{
"name": "Python3",
"plugin": "Python3",
"setup": "%matplotlib inline\nimport numpy\nimport matplotlib\nfrom matplotlib import pylab, mlab, pyplot\nnp = numpy\nplt = pyplot\nfrom IPython.display import display\nfrom IPython.core.pylabtools import figsize, getfigs\nfrom pylab import *\nfrom numpy import *\n",
"view": {
"cm": {
"mode": "python"
}
}
}
],
"metadata": {
"publication-id": "566c36fb-a7c0-4061-b1ca-0b2f38528ea8"
},
"namespace": {}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment