Created
December 13, 2015 23:44
-
-
Save ei-grad/0aacea5dd6bf9fa6fb19 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "beaker": "2", | |
| "cells": [ | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codebd1MLI", | |
| "input": { | |
| "body": [ | |
| "from elasticsearch_dsl import Search", | |
| "from elasticsearch_dsl.connections import connections", | |
| "", | |
| "connections.configure(default={'hosts': 'http://elasticsearch:9200/'})", | |
| " ", | |
| "", | |
| "LT_DATE = '2015-12-11'" | |
| ] | |
| }, | |
| "lineCount": 7, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codebKvCvA", | |
| "input": { | |
| "body": [ | |
| "def es_table(s):", | |
| " return [i['_source'] for i in s.execute().to_dict()['hits']['hits']]", | |
| "", | |
| "", | |
| "es_table(", | |
| " Search().params(request_timeout=300)", | |
| " .index('mixpanel-*').doc_type('MainScreenAppear')", | |
| " .query('match', **{'$city': 'Miass'})", | |
| " .sort('-@timestamp')[:50]", | |
| ")" | |
| ] | |
| }, | |
| "lineCount": 10, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeFx4vel", | |
| "input": { | |
| "body": [ | |
| "from collections import defaultdict", | |
| "from itertools import cycle", | |
| "import re", | |
| "", | |
| "import dateutil", | |
| "", | |
| "from matplotlib import cm", | |
| "from random import random", | |
| "", | |
| "", | |
| "class ESPlot():", | |
| " ", | |
| " _topic_cache = {}", | |
| " ", | |
| " sep = '.'", | |
| " ", | |
| " def __init__(self, search,", | |
| " document_key='@timestamp',", | |
| " xaxis_type='time',", | |
| " yaxis_type='linear'):", | |
| "", | |
| " self.xaxis_type = xaxis_type", | |
| " self.yaxis_type = yaxis_type", | |
| " ", | |
| " self.cmap = cm.get_cmap('Set1')", | |
| " self._color_cycle = cycle(range(1, self.cmap.N))", | |
| " ", | |
| " self.plot_data = []", | |
| " self.metric_data = defaultdict(list)", | |
| " self.element_types = {}", | |
| " ", | |
| " self.search = search", | |
| " ", | |
| " self.response = search.execute()", | |
| " ", | |
| " response = self.response.to_dict()", | |
| " ", | |
| " for doc in response['hits']['hits']:", | |
| " key = doc.get(document_key, doc.get('_source', {}).get(document_key))", | |
| " if key is None:", | |
| " continue", | |
| " if isinstance(key, str):", | |
| " key = int(dateutil.parser.parse(key).timestamp() * 1000)", | |
| " for i in doc:", | |
| " if i in [document_key]:", | |
| " continue", | |
| " self.process_metric(i, key, doc[i])", | |
| " ", | |
| " if 'aggregations' in response:", | |
| " for key, value in response['aggregations'].items():", | |
| " self.process_aggregation(key, value)", | |
| " ", | |
| " for k, v in self.metric_data.items():", | |
| " self.add_elements(k, v)", | |
| " ", | |
| " def process_aggregation(self, name, data):", | |
| " assert 'buckets' in data", | |
| " for bucket in data['buckets']:", | |
| " self.process_bucket(name, bucket)", | |
| "", | |
| " def process_bucket(self, name, bucket, key=None):", | |
| "", | |
| " if isinstance(bucket['key'], str):", | |
| " bucket_key = key", | |
| " bucket_name = self.join(name, bucket['key'])", | |
| " else:", | |
| " bucket_key = bucket['key']", | |
| " bucket_name = name", | |
| " ", | |
| " for member in bucket:", | |
| " if member in ['key', 'key_as_string']:", | |
| " continue", | |
| " if isinstance(bucket[member], dict) and 'buckets' in bucket[member]:", | |
| " for i in bucket[member]['buckets']:", | |
| " self.process_bucket(self.join(bucket_name, member), i, bucket_key)", | |
| " elif bucket_key:", | |
| " self.process_metric(self.join(bucket_name, member), bucket_key, bucket[member])", | |
| "", | |
| " def process_metric(self, name, key, value):", | |
| " if isinstance(value, dict):", | |
| " if value.keys() == set(['upper', 'lower']):", | |
| " if name not in self.element_types:", | |
| " self.element_types[name] = 'stem'", | |
| " self.metric_data[name].append({", | |
| " 'x': key, 'y': value['lower'], 'y2': value['upper']", | |
| " })", | |
| " else:", | |
| " for metric, value in value.items():", | |
| " if metric in ['value_as_string']:", | |
| " continue", | |
| " self.process_metric(self.join(name, metric), key, value)", | |
| " else:", | |
| " self.metric_data[name].append({'x': key, 'y': value})", | |
| " ", | |
| " def add_elements(self, legend, elements, **kwargs):", | |
| " ret = {", | |
| " 'type': self.element_types.get(legend, 'line'),", | |
| " 'legend': legend,", | |
| " 'elements': elements,", | |
| " }", | |
| " ret.update(kwargs)", | |
| " if ret['type'] in ('stem', 'bar') and 'width' not in ret and len(elements) > 1:", | |
| " ret['width'] = elements[1]['x'] - elements[0]['x']", | |
| " if 'color' not in ret:", | |
| " ret['color'] = self.next_color()", | |
| " self.plot_data.append(ret)", | |
| "", | |
| " def join(self, *args):", | |
| " return self.sep.join([args[0]] + [i.replace(self.sep, '%%%x' % ord(self.sep)) for i in args[1:]])", | |
| " ", | |
| " @staticmethod", | |
| " def rgb_to_hex(rgb):", | |
| " return '#%02x%02x%02x' % tuple(int(i * 256) for i in rgb[:3])", | |
| "", | |
| " def next_color(self):", | |
| " return self.rgb_to_hex(self.cmap(random()))", | |
| "", | |
| " @staticmethod", | |
| " def translate(pat):", | |
| " i, n = 0, len(pat)", | |
| " res = ''", | |
| " while i < n:", | |
| " c = pat[i]", | |
| " i = i+1", | |
| " if c == '*':", | |
| " res = res + '[^.]*'", | |
| " elif c == '#':", | |
| " res = res + '.*'", | |
| " else:", | |
| " res = res + re.escape(c)", | |
| " return res + '$\\Z(?ms)'", | |
| "", | |
| " def topic_match(self, name, pat):", | |
| " _cache = self._topic_cache", | |
| " try:", | |
| " re_pat = _cache[pat]", | |
| " except KeyError:", | |
| " res = self.translate(pat)", | |
| " if len(_cache) >= 100:", | |
| " _cache.clear()", | |
| " _cache[pat] = re_pat = re.compile(res)", | |
| " return re_pat.match(name) is not None", | |
| "", | |
| " def show(self, only=None, exclude=None, **kwargs):", | |
| " ", | |
| " data = self.plot_data", | |
| " ", | |
| " if only:", | |
| " if isinstance(only, str):", | |
| " only = [only]", | |
| " data = [", | |
| " i for i in data", | |
| " if any([self.topic_match(i['legend'], j) for j in only])", | |
| " ] ", | |
| " ", | |
| " if exclude:", | |
| " if isinstance(exclude, str):", | |
| " exclude = [exclude]", | |
| " data = [", | |
| " i for i in data", | |
| " if not any([self.topic_match(i['legend'], j) for j in exclude])", | |
| " ]", | |
| " ", | |
| " data.sort(key=lambda x: x['legend'])", | |
| " ", | |
| " ret = {", | |
| " 'type': 'Plot',", | |
| " 'xAxis': {'type': 'time'},", | |
| " 'useToolTip': True,", | |
| " 'showLegend': True,", | |
| " 'data': data,", | |
| " }", | |
| " ret.update(kwargs)", | |
| " ", | |
| " return ret" | |
| ], | |
| "hidden": true | |
| }, | |
| "lineCount": 175, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codegoUfGD", | |
| "input": { | |
| "body": [ | |
| "s = (", | |
| " Search().params(request_timeout=300)", | |
| " .index('topbeat-*').doc_type('system')", | |
| " .sort('@timestamp')", | |
| ")", | |
| "ESPlot(s[:s.count()]).show(only='*.cpu.*_p')" | |
| ] | |
| }, | |
| "lineCount": 6, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeFkLtc4", | |
| "input": { | |
| "body": [ | |
| "s = Search().params(request_timeout=300).index('mixpanel-*')", | |
| "s.aggs.bucket(", | |
| " \"hourly\", \"date_histogram\",", | |
| " interval=\"hour\", field=\"@timestamp\"", | |
| ")", | |
| "ESPlot(s[:0]).show()" | |
| ] | |
| }, | |
| "lineCount": 6, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeLVurin", | |
| "input": { | |
| "body": [ | |
| "s = Search().params(request_timeout=300).index('mixpanel-*')", | |
| "s = s.query('range', **{'@timestamp': {'lt': LT_DATE}})", | |
| "s.aggs.bucket(", | |
| " \"daily\", \"date_histogram\",", | |
| " interval=\"day\", field=\"@timestamp\"", | |
| ")", | |
| "s.aggs['daily'].bucket(", | |
| " \"prediction\", \"moving_avg\",", | |
| " buckets_path=\"active_users\",", | |
| " model=\"holt_winters\", window=7 * 10,", | |
| " minimize=True, settings={'period': 7},", | |
| " predict=21,", | |
| ")", | |
| "s.aggs['daily'].metric(\"active_users\", \"cardinality\", field=\"distinct_id\")", | |
| "ESPlot(s[:0]).show(exclude=\"*.doc_count\")" | |
| ] | |
| }, | |
| "lineCount": 15, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeTIGYJ9", | |
| "input": { | |
| "body": [ | |
| "s = Search().params(request_timeout=300).index('mixpanel-*')", | |
| "s = s.query('range', **{'@timestamp': {'lt': LT_DATE}})", | |
| "s.aggs.bucket(", | |
| " \"hourly\", \"date_histogram\",", | |
| " interval=\"hour\", field=\"@timestamp\"", | |
| ")", | |
| "s.aggs['hourly'].bucket(", | |
| " \"prediction\", \"moving_avg\",", | |
| " buckets_path=\"active_users\",", | |
| " model=\"holt_winters\", window=90 * 24,", | |
| " minimize=True, settings={'period': 7 * 24},", | |
| " predict=14 * 24,", | |
| ")", | |
| "s.aggs['hourly'].metric(\"active_users\", \"cardinality\", field=\"distinct_id\")", | |
| "ESPlot(s[:0]).show(exclude=\"*.doc_count\")" | |
| ] | |
| }, | |
| "lineCount": 15, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeF65PyF", | |
| "input": { | |
| "body": [ | |
| "s = Search().params(request_timeout=300).index('mixpanel-*')", | |
| "s.aggs.bucket(\"daily\", \"date_histogram\", interval=\"day\", field=\"@timestamp\")", | |
| "s.aggs['daily'].bucket(\"hourly\", \"date_histogram\", interval=\"hour\", field=\"@timestamp\")", | |
| "s.aggs['daily']['hourly'].metric(\"hau\", \"cardinality\", field=\"distinct_id\")", | |
| "s.aggs['daily'].metric(\"day_stats\", \"extended_stats_bucket\", buckets_path=\"hourly>hau\")", | |
| "ESPlot(s[:0]).show(only='*.day_stats.*')" | |
| ] | |
| }, | |
| "lineCount": 6, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codedSAbli", | |
| "input": { | |
| "body": [ | |
| "s = Search().params(request_timeout=300).index('mixpanel-*')", | |
| "s = s.query('range', **{\"@timestamp\": {\"gte\": \"2015-12-01\", \"lt\": \"2015-12-11\"}})", | |
| "s.aggs.bucket(\"region\", \"terms\", field=\"$region\").bucket(\"city\", \"terms\", field=\"$city\", size=5)", | |
| "s.aggs['region'].bucket(", | |
| " \"hourly\", \"date_histogram\",", | |
| " interval=\"hour\", field=\"@timestamp\"", | |
| ").metric(", | |
| " \"unique_users\", \"cardinality\", field=\"distinct_id\"", | |
| ")", | |
| "s.aggs['region']['city'].bucket(", | |
| " \"hourly\", \"date_histogram\",", | |
| " interval=\"hour\", field=\"@timestamp\"", | |
| ").metric(", | |
| " \"active_users\", \"cardinality\", field=\"distinct_id\"", | |
| ")", | |
| "plot = ESPlot(s[:0])" | |
| ] | |
| }, | |
| "lineCount": 16, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeiuZMNX", | |
| "input": { | |
| "body": [ | |
| "plot.show(only=\"*.*.*.*.*\")" | |
| ] | |
| }, | |
| "lineCount": 1, | |
| "output": {}, | |
| "type": "code" | |
| }, | |
| { | |
| "evaluator": "Python3", | |
| "evaluatorReader": true, | |
| "id": "codeceYZJY", | |
| "input": { | |
| "body": [ | |
| "plot.show(only=\"region.Chelyabinsk.#.value\")" | |
| ] | |
| }, | |
| "lineCount": 1, | |
| "output": {}, | |
| "type": "code" | |
| } | |
| ], | |
| "evaluators": [ | |
| { | |
| "name": "Python3", | |
| "plugin": "Python3", | |
| "setup": "%matplotlib inline\nimport numpy\nimport matplotlib\nfrom matplotlib import pylab, mlab, pyplot\nnp = numpy\nplt = pyplot\nfrom IPython.display import display\nfrom IPython.core.pylabtools import figsize, getfigs\nfrom pylab import *\nfrom numpy import *\n", | |
| "view": { | |
| "cm": { | |
| "mode": "python" | |
| } | |
| } | |
| } | |
| ], | |
| "metadata": { | |
| "publication-id": "566c36fb-a7c0-4061-b1ca-0b2f38528ea8" | |
| }, | |
| "namespace": {} | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment