Skip to content

Instantly share code, notes, and snippets.

@qix
Last active August 29, 2015 14:21
Show Gist options
  • Save qix/7b8122539c9b4969c35b to your computer and use it in GitHub Desktop.
Save qix/7b8122539c9b4969c35b to your computer and use it in GitHub Desktop.

== Simple cluster setup ==

  • One druid broker: druid-broker1
  • One historical node: druid-historical3
  • One realtime node: druid-realtime2

Configuration details at the bottom

== Running against druid-broker1 value occassionally changes by 300000 ==

# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971361}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2689553}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2689555}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971454}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971455}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971458}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2689563}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971460}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2689563}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2689564}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971483}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971483}}]
# ./run.sh druid-broker1
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2971485}}]

== Running against druid-realtime2 value increase steadily with new data ==

# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643302}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643305}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643309}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643312}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643314}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643314}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643316}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643318}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643319}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643320}}]
# ./run.sh druid-realtime2
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":643321}}]

== Running against druid-historical3 gives expected values ==

# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
# ./run.sh druid-historical3
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]

== Running all from a single script (all results within a few seconds) ==

realtime
[{"timestamp":"2015-05-19T04:48:38.000Z","result":{"Count":645347}}]
historical
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2398455}}]
broker x 10
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2691663}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2691663}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2973560}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2973560}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2973560}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2973561}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2691664}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2973561}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2691664}}]
[{"timestamp":"2015-01-22T14:25:34.000Z","result":{"Count":2691664}}]

historical + realtime = 3043802

== Count query that is being run ==

{
   "dimensions" : [],
   "metrics" : [],
   "intervals" : [
      "2010-01-01/2017-01-01"
   ],
   "aggregations":[{"type":"longSum","name":"Count","fieldName":"Count"}], "postAggregations":[],
   "dataSource" : "allFeedFeatures",
   "pagingSpec" : {
      "threshold" : 5,
      "pagingIdentifiers" : {}
   },
   "filter" : null,
   "granularity" : "all",
   "queryType" : "timeseries"
}

== Run script for reference ==

#!/bin/bash
cat count.json | curl \
  --silent \
  -X POST \
  -H 'content-type: application/json' \
  --data-binary "@-" \
  http://$1:8080/druid/v2/

== Realtime ingestion spec ==

[
  {
    "dataSchema": {
      "dataSource": "allFeedFeatures",
      "granularitySpec": {
        "queryGranularity": "NONE",
        "segmentGranularity": "DAY",
        "type": "uniform"
      },
      "metricsSpec": [
        {
          "name": "Count",
          "type": "count"
        },
        ... lots more counts and hyperUniques ...
      ],
      "parser": {
        "parseSpec": {
          "dimensionsSpec": {
            "dimensionExclusions": [],
            "dimensions": [
              "ActionId",
              "ApiKey",
              "ActionName",
              "Network",
              ...
            ],
            "spatialDimensions": []
          },
          "format": "json",
          "timestampSpec": {
            "column": "Timestamp",
            "format": "millis"
          }
        },
        "type": "string"
      }
    },
    "ioConfig": {
      "firehose": {
        "consumerProps": {
          "auto.commit.enable": "false",
          "auto.offset.reset": "largest",
          "fetch.message.max.bytes": "1000000",
          "group.id": "druidAllFeedFeatures",
          "zookeeper.connect": "consul1:2181,consul2:2181,consul3:2181,consul4:2181,consul5:2181/kafka",
          "zookeeper.connection.timeout.ms": "15000",
          "zookeeper.session.timeout.ms": "15000",
          "zookeeper.sync.time.ms": "5000"
        },
        "feed": "feedFeatures",
        "type": "kafka-0.8"
      },
      "plumber": {
        "type": "realtime"
      },
      "type": "realtime"
    },
    "tuningConfig": {
      "basePersistDirectory": "/data/druid/kafka-persist",
      "intermediatePersistPeriod": "PT10m",
      "maxRowsInMemory": 5000000,
      "rejectionPolicy": {
        "type": "serverTime"
      },
      "type": "realtime",
      "windowPeriod": "PT60m"
    }
  }
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment