Skip to content

Instantly share code, notes, and snippets.

@nickchappell
Created July 23, 2014 16:10
Show Gist options
  • Save nickchappell/6f6c2541a9d9d2fc44c5 to your computer and use it in GitHub Desktop.
Save nickchappell/6f6c2541a9d9d2fc44c5 to your computer and use it in GitHub Desktop.
InfluxDB cluster configs and logs
# Welcome to the InfluxDB configuration file.
# If hostname (on the OS) doesn't return a name that can be resolved by the other
# systems in the cluster, you'll have to set the hostname to an IP or something
# that can be resolved here.
hostname = "influxdb1.local"
bind-address = "0.0.0.0"
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
# The data includes raft name (random 8 bytes), os, arch and version
# We don't track ip addresses of servers reporting. This is only used
# to track the number of instances running and the versions which
# is very helpful for us.
# Change this option to true to disable reporting.
reporting-disabled = false
[logging]
# logging level can be one of "debug", "info", "warn" or "error"
level = "info"
file = "/opt/influxdb/shared/log.txt" # stdout to log to standard out
# Configure the admin server
[admin]
port = 8083 # binding is disabled if the port isn't set
assets = "/opt/influxdb/current/admin"
# Configure the http api
[api]
port = 8086 # binding is disabled if the port isn't set
# ssl-port = 8084 # Ssl support is enabled if you set a port and cert
# ssl-cert = /path/to/cert.pem
# connections will timeout after this amount of time. Ensures that clients that misbehave
# and keep alive connections they don't use won't end up connection a million times.
# However, if a request is taking longer than this to complete, could be a problem.
read-timeout = "5s"
[input_plugins]
# Configure the graphite api
[input_plugins.graphite]
enabled = false
# port = 2003
# database = "" # store graphite data in this database
# udp_enabled = true # enable udp interface on the same port as the tcp interface
# Configure the udp api
[input_plugins.udp]
enabled = false
# port = 4444
# database = ""
# Configure multiple udp apis each can write to separate db. Just
# repeat the following section to enable multiple udp apis on
# different ports.
[[input_plugins.udp_servers]] # array of tables
enabled = false
# port = 5551
# database = "db1"
# Raft configuration
[raft]
# The raft port should be open between all servers in a cluster.
# However, this port shouldn't be accessible from the internet.
port = 8090
# Where the raft logs are stored. The user running InfluxDB will need read/write access.
dir = "/opt/influxdb/shared/data/raft"
# election-timeout = "1s"
[storage]
dir = "/opt/influxdb/shared/data/db"
# How many requests to potentially buffer in memory. If the buffer gets filled then writes
# will still be logged and once the local storage has caught up (or compacted) the writes
# will be replayed from the WAL
write-buffer-size = 10000
[cluster]
# A comma separated list of servers to seed
# this server. this is only relevant when the
# server is joining a new cluster. Otherwise
# the server will use the list of known servers
# prior to shutting down. Any server can be pointed to
# as a seed. It will find the Raft leader automatically.
# Here's an example. Note that the port on the host is the same as the raft port.
# seed-servers = ["hosta:8090","hostb:8090"]
# Replication happens over a TCP connection with a Protobuf protocol.
# This port should be reachable between all servers in a cluster.
# However, this port shouldn't be accessible from the internet.
protobuf_port = 8099
protobuf_timeout = "2s" # the write timeout on the protobuf conn any duration parseable by time.ParseDuration
protobuf_heartbeat = "200ms" # the heartbeat interval between the servers. must be parseable by time.ParseDuration
protobuf_min_backoff = "1s" # the minimum backoff after a failed heartbeat attempt
protobuf_max_backoff = "10s" # the maxmimum backoff after a failed heartbeat attempt
# How many write requests to potentially buffer in memory per server. If the buffer gets filled then writes
# will still be logged and once the server has caught up (or come back online) the writes
# will be replayed from the WAL
write-buffer-size = 10000
# the maximum number of responses to buffer from remote nodes, if the
# expected number of responses exceed this number then querying will
# happen sequentially and the buffer size will be limited to this
# number
max-response-buffer-size = 100
# When queries get distributed out to shards, they go in parallel. This means that results can get buffered
# in memory since results will come in any order, but have to be processed in the correct time order.
# Setting this higher will give better performance, but you'll need more memory. Setting this to 1 will ensure
# that you don't need to buffer in memory, but you won't get the best performance.
concurrent-shard-query-limit = 10
[leveldb]
# Maximum mmap open files, this will affect the virtual memory used by
# the process
max-open-files = 40
# LRU cache size, LRU is used by leveldb to store contents of the
# uncompressed sstables. You can use `m` or `g` prefix for megabytes
# and gigabytes, respectively.
lru-cache-size = "200m"
# The default setting on this is 0, which means unlimited. Set this to something if you want to
# limit the max number of open files. max-open-files is per shard so this * that will be max.
max-open-shards = 0
# The default setting is 100. This option tells how many points will be fetched from LevelDb before
# they get flushed into backend.
point-batch-size = 100
# The number of points to batch in memory before writing them to leveldb. Lowering this number will
# reduce the memory usage, but will result in slower writes.
write-batch-size = 5000000
# These options specify how data is sharded across the cluster. There are two
# shard configurations that have the same knobs: short term and long term.
# Any series that begins with a capital letter like Exceptions will be written
# into the long term storage. Any series beginning with a lower case letter
# like exceptions will be written into short term. The idea being that you
# can write high precision data into short term and drop it after a couple
# of days. Meanwhile, continuous queries can run downsampling on the short term
# data and write into the long term area.
[sharding]
# how many servers in the cluster should have a copy of each shard.
# this will give you high availability and scalability on queries
replication-factor = 1
[sharding.short-term]
# each shard will have this period of time. Note that it's best to have
# group by time() intervals on all queries be < than this setting. If they are
# then the aggregate is calculated locally. Otherwise, all that data gets sent
# over the network when doing a query.
duration = "7d"
# split will determine how many shards to split each duration into. For example,
# if we created a shard for 2014-02-10 and split was set to 2. Then two shards
# would be created that have the data for 2014-02-10. By default, data will
# be split into those two shards deterministically by hashing the (database, serise)
# tuple. That means that data for a given series will be written to a single shard
# making querying efficient. That can be overridden with the next option.
split = 1
# You can override the split behavior to have the data for series that match a
# given regex be randomly distributed across the shards for a given interval.
# You can use this if you have a hot spot for a given time series writing more
# data than a single server can handle. Most people won't have to resort to this
# option. Also note that using this option means that queries will have to send
# all data over the network so they won't be as efficient.
# split-random = "/^hf.*/"
[sharding.long-term]
duration = "30d"
split = 1
# split-random = "/^Hf.*/"
[wal]
dir = "/opt/influxdb/shared/data/wal"
flush-after = 1000 # the number of writes after which wal will be flushed, 0 for flushing on every write
bookmark-after = 1000 # the number of writes after which a bookmark will be created
# the number of writes after which an index entry is created pointing
# to the offset of the first request, default to 1k
index-after = 1000
# the number of requests per one log file, if new requests came in a
# new log file will be created
requests-per-logfile = 10000
root@influxdb1: ~ # cat /opt/influxdb/shared/log.txt
[2014/07/23 09:06:41 PDT] [INFO] (main.setupLogging:53) Redirectoring logging to /opt/influxdb/shared/log.txt
[2014/07/23 09:06:41 PDT] [INFO] (main.main:121) Starting Influx Server 0.7.3 bound to 0.0.0.0...
[2014/07/23 09:06:41 PDT] [INFO] (server.NewServer:38) Opening database at /opt/influxdb/shared/data/db
[2014/07/23 09:06:41 PDT] [INFO] (wal.NewWAL:40) Opening wal in /opt/influxdb/shared/data/wal
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.NewRaftServer:79) Using /dev/urandom to initialize the raft server name
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.NewRaftServer:104) Setting raft name to 1de2b3b0513f7a8b
[2014/07/23 09:06:41 PDT] [INFO] (api/http.(*HttpServer).EnableSsl:62) Ssl will be disabled since the ssl port or certificate path weren't set
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*RaftServer).Serve:513) Initializing Raft HTTP server
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*RaftServer).Serve:524) Raft Server Listening at 0.0.0.0:8090
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*RaftServer).startRaft:353) Initializing Raft Server: http://influxdb1:8090
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*RaftServer).startRaft:381) Starting as new Raft leader...
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*RaftServer).raftEventHandler:415) (raft:1de2b3b0513f7a8b) Selected as leader. Starting leader loop.
[2014/07/23 09:06:41 PDT] [INFO] (coordinator.(*InfluxJoinCommand).Apply:246) Adding new server to the cluster config 1de2b3b0513f7a8b
[2014/07/23 09:06:41 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:245) Added server to cluster config: 1, http://influxdb1:8090, influxdb1:8099
[2014/07/23 09:06:41 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:246) Checking whether this is the local server local: influxdb1:8099, new: influxdb1:8099
[2014/07/23 09:06:41 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:255) Added the local server
[2014/07/23 09:06:41 PDT] [INFO] (server.(*Server).ListenAndServe:88) Waiting for local server to be added
[2014/07/23 09:06:41 PDT] [INFO] (wal.(*WAL).SetServerId:109) Setting server id to 1 and recovering
[2014/07/23 09:06:46 PDT] [INFO] (server.(*Server).ListenAndServe:120) Recovering from log...
[2014/07/23 09:06:46 PDT] [INFO] (cluster.NewWriteBuffer:30) local: Initializing write buffer with buffer size of 10000
[2014/07/23 09:06:46 PDT] [INFO] (cluster.(*ClusterConfiguration).RecoverFromWAL:1064) Waiting for servers to recover
[2014/07/23 09:06:46 PDT] [INFO] (coordinator.(*ProtobufServer).ListenAndServe:62) ProtobufServer listening on 0.0.0.0:8099
[2014/07/23 09:06:46 PDT] [INFO] (cluster.func·004:1046) Recovering local server
[2014/07/23 09:06:46 PDT] [INFO] (cluster.(*ClusterConfiguration).recover:1072) No shards to recover for 1
[2014/07/23 09:06:46 PDT] [INFO] (cluster.func·004:1048) Recovered local server
[2014/07/23 09:06:46 PDT] [INFO] (server.(*Server).ListenAndServe:125) recovered
[2014/07/23 09:06:46 PDT] [INFO] (coordinator.(*CoordinatorImpl).ConnectToProtobufServers:1014) Connecting to other nodes in the cluster
[2014/07/23 09:06:46 PDT] [INFO] (server.(*Server).ListenAndServe:131) Starting admin interface on port 8083
[2014/07/23 09:06:46 PDT] [WARN] (server.(*Server).ListenAndServe:148) Cannot start udp server. please check your configuration
[2014/07/23 09:06:46 PDT] [WARN] (server.(*Server).ListenAndServe:148) Cannot start udp server. please check your configuration
[2014/07/23 09:06:46 PDT] [INFO] (server.(*Server).ListenAndServe:169) Starting Http Api server on port 8086
[2014/07/23 09:06:46 PDT] [INFO] (server.(*Server).reportStats:207) Reporting stats: &influxdb.Series{Name:"reports", Columns:[]string{"os", "arch", "id", "version"}, Points:[][]interface {}{[]interface {}{"linux", "amd64", "1de2b3b0513f7a8b", "0.7.3"}}}
# Welcome to the InfluxDB configuration file.
# If hostname (on the OS) doesn't return a name that can be resolved by the other
# systems in the cluster, you'll have to set the hostname to an IP or something
# that can be resolved here.
hostname = "influxdb2.local"
bind-address = "0.0.0.0"
# Once every 24 hours InfluxDB will report anonymous data to m.influxdb.com
# The data includes raft name (random 8 bytes), os, arch and version
# We don't track ip addresses of servers reporting. This is only used
# to track the number of instances running and the versions which
# is very helpful for us.
# Change this option to true to disable reporting.
reporting-disabled = false
[logging]
# logging level can be one of "debug", "info", "warn" or "error"
level = "info"
file = "/opt/influxdb/shared/log.txt" # stdout to log to standard out
# Configure the admin server
[admin]
port = 8083 # binding is disabled if the port isn't set
assets = "/opt/influxdb/current/admin"
# Configure the http api
[api]
port = 8086 # binding is disabled if the port isn't set
# ssl-port = 8084 # Ssl support is enabled if you set a port and cert
# ssl-cert = /path/to/cert.pem
# connections will timeout after this amount of time. Ensures that clients that misbehave
# and keep alive connections they don't use won't end up connection a million times.
# However, if a request is taking longer than this to complete, could be a problem.
read-timeout = "5s"
[input_plugins]
# Configure the graphite api
[input_plugins.graphite]
enabled = false
# port = 2003
# database = "" # store graphite data in this database
# udp_enabled = true # enable udp interface on the same port as the tcp interface
# Configure the udp api
[input_plugins.udp]
enabled = false
# port = 4444
# database = ""
# Configure multiple udp apis each can write to separate db. Just
# repeat the following section to enable multiple udp apis on
# different ports.
[[input_plugins.udp_servers]] # array of tables
enabled = false
# port = 5551
# database = "db1"
# Raft configuration
[raft]
# The raft port should be open between all servers in a cluster.
# However, this port shouldn't be accessible from the internet.
port = 8090
# Where the raft logs are stored. The user running InfluxDB will need read/write access.
dir = "/opt/influxdb/shared/data/raft"
# election-timeout = "1s"
[storage]
dir = "/opt/influxdb/shared/data/db"
# How many requests to potentially buffer in memory. If the buffer gets filled then writes
# will still be logged and once the local storage has caught up (or compacted) the writes
# will be replayed from the WAL
write-buffer-size = 10000
[cluster]
# A comma separated list of servers to seed
# this server. this is only relevant when the
# server is joining a new cluster. Otherwise
# the server will use the list of known servers
# prior to shutting down. Any server can be pointed to
# as a seed. It will find the Raft leader automatically.
# Here's an example. Note that the port on the host is the same as the raft port.
seed-servers = ["influxdb1.local"]
# Replication happens over a TCP connection with a Protobuf protocol.
# This port should be reachable between all servers in a cluster.
# However, this port shouldn't be accessible from the internet.
protobuf_port = 8099
protobuf_timeout = "2s" # the write timeout on the protobuf conn any duration parseable by time.ParseDuration
protobuf_heartbeat = "200ms" # the heartbeat interval between the servers. must be parseable by time.ParseDuration
protobuf_min_backoff = "1s" # the minimum backoff after a failed heartbeat attempt
protobuf_max_backoff = "10s" # the maxmimum backoff after a failed heartbeat attempt
# How many write requests to potentially buffer in memory per server. If the buffer gets filled then writes
# will still be logged and once the server has caught up (or come back online) the writes
# will be replayed from the WAL
write-buffer-size = 10000
# the maximum number of responses to buffer from remote nodes, if the
# expected number of responses exceed this number then querying will
# happen sequentially and the buffer size will be limited to this
# number
max-response-buffer-size = 100
# When queries get distributed out to shards, they go in parallel. This means that results can get buffered
# in memory since results will come in any order, but have to be processed in the correct time order.
# Setting this higher will give better performance, but you'll need more memory. Setting this to 1 will ensure
# that you don't need to buffer in memory, but you won't get the best performance.
concurrent-shard-query-limit = 10
[leveldb]
# Maximum mmap open files, this will affect the virtual memory used by
# the process
max-open-files = 40
# LRU cache size, LRU is used by leveldb to store contents of the
# uncompressed sstables. You can use `m` or `g` prefix for megabytes
# and gigabytes, respectively.
lru-cache-size = "200m"
# The default setting on this is 0, which means unlimited. Set this to something if you want to
# limit the max number of open files. max-open-files is per shard so this * that will be max.
max-open-shards = 0
# The default setting is 100. This option tells how many points will be fetched from LevelDb before
# they get flushed into backend.
point-batch-size = 100
# The number of points to batch in memory before writing them to leveldb. Lowering this number will
# reduce the memory usage, but will result in slower writes.
write-batch-size = 5000000
s
# These options specify how data is sharded across the cluster. There are two
# shard configurations that have the same knobs: short term and long term.
# Any series that begins with a capital letter like Exceptions will be written
# into the long term storage. Any series beginning with a lower case letter
# like exceptions will be written into short term. The idea being that you
# can write high precision data into short term and drop it after a couple
# of days. Meanwhile, continuous queries can run downsampling on the short term
# data and write into the long term area.
[sharding]
# how many servers in the cluster should have a copy of each shard.
# this will give you high availability and scalability on queries
replication-factor = 1
[sharding.short-term]
# each shard will have this period of time. Note that it's best to have
# group by time() intervals on all queries be < than this setting. If they are
# then the aggregate is calculated locally. Otherwise, all that data gets sent
# over the network when doing a query.
duration = "7d"
# split will determine how many shards to split each duration into. For example,
# if we created a shard for 2014-02-10 and split was set to 2. Then two shards
# would be created that have the data for 2014-02-10. By default, data will
# be split into those two shards deterministically by hashing the (database, serise)
# tuple. That means that data for a given series will be written to a single shard
# making querying efficient. That can be overridden with the next option.
split = 1
# You can override the split behavior to have the data for series that match a
# given regex be randomly distributed across the shards for a given interval.
# You can use this if you have a hot spot for a given time series writing more
# data than a single server can handle. Most people won't have to resort to this
# option. Also note that using this option means that queries will have to send
# all data over the network so they won't be as efficient.
# split-random = "/^hf.*/"
[sharding.long-term]
duration = "30d"
split = 1
# split-random = "/^Hf.*/"
[wal]
dir = "/opt/influxdb/shared/data/wal"
flush-after = 1000 # the number of writes after which wal will be flushed, 0 for flushing on every write
bookmark-after = 1000 # the number of writes after which a bookmark will be created
# the number of writes after which an index entry is created pointing
# to the offset of the first request, default to 1k
index-after = 1000
# the number of requests per one log file, if new requests came in a
# new log file will be created
requests-per-logfile = 10000
root@influxdb2: ~ # cat /opt/influxdb/shared/log.txt
[2014/07/23 09:09:03 PDT] [INFO] (main.setupLogging:53) Redirectoring logging to /opt/influxdb/shared/log.txt
[2014/07/23 09:09:03 PDT] [INFO] (main.main:121) Starting Influx Server 0.7.3 bound to 0.0.0.0...
[2014/07/23 09:09:03 PDT] [INFO] (server.NewServer:38) Opening database at /opt/influxdb/shared/data/db
[2014/07/23 09:09:03 PDT] [INFO] (wal.NewWAL:40) Opening wal in /opt/influxdb/shared/data/wal
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.NewRaftServer:79) Using /dev/urandom to initialize the raft server name
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.NewRaftServer:104) Setting raft name to b953bb9e7196f7d4
[2014/07/23 09:09:03 PDT] [INFO] (api/http.(*HttpServer).EnableSsl:62) Ssl will be disabled since the ssl port or certificate path weren't set
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*RaftServer).Serve:513) Initializing Raft HTTP server
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*RaftServer).Serve:524) Raft Server Listening at 0.0.0.0:8090
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*RaftServer).startRaft:353) Initializing Raft Server: http://influxdb2:8090
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*RaftServer).startRaft:381) Starting as new Raft leader...
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*RaftServer).raftEventHandler:415) (raft:b953bb9e7196f7d4) Selected as leader. Starting leader loop.
[2014/07/23 09:09:03 PDT] [INFO] (coordinator.(*InfluxJoinCommand).Apply:246) Adding new server to the cluster config b953bb9e7196f7d4
[2014/07/23 09:09:03 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:245) Added server to cluster config: 1, http://influxdb2:8090, influxdb2:8099
[2014/07/23 09:09:03 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:246) Checking whether this is the local server local: influxdb2:8099, new: influxdb2:8099
[2014/07/23 09:09:03 PDT] [INFO] (cluster.(*ClusterConfiguration).AddPotentialServer:255) Added the local server
[2014/07/23 09:09:03 PDT] [INFO] (server.(*Server).ListenAndServe:88) Waiting for local server to be added
[2014/07/23 09:09:03 PDT] [INFO] (wal.(*WAL).SetServerId:109) Setting server id to 1 and recovering
[2014/07/23 09:09:08 PDT] [INFO] (server.(*Server).ListenAndServe:120) Recovering from log...
[2014/07/23 09:09:08 PDT] [INFO] (cluster.NewWriteBuffer:30) local: Initializing write buffer with buffer size of 10000
[2014/07/23 09:09:08 PDT] [INFO] (cluster.(*ClusterConfiguration).RecoverFromWAL:1064) Waiting for servers to recover
[2014/07/23 09:09:08 PDT] [INFO] (coordinator.(*ProtobufServer).ListenAndServe:62) ProtobufServer listening on 0.0.0.0:8099
[2014/07/23 09:09:08 PDT] [INFO] (cluster.func·004:1046) Recovering local server
[2014/07/23 09:09:08 PDT] [INFO] (cluster.(*ClusterConfiguration).recover:1072) No shards to recover for 1
[2014/07/23 09:09:08 PDT] [INFO] (cluster.func·004:1048) Recovered local server
[2014/07/23 09:09:08 PDT] [INFO] (server.(*Server).ListenAndServe:125) recovered
[2014/07/23 09:09:08 PDT] [INFO] (coordinator.(*CoordinatorImpl).ConnectToProtobufServers:1014) Connecting to other nodes in the cluster
[2014/07/23 09:09:08 PDT] [INFO] (server.(*Server).ListenAndServe:131) Starting admin interface on port 8083
[2014/07/23 09:09:08 PDT] [WARN] (server.(*Server).ListenAndServe:148) Cannot start udp server. please check your configuration
[2014/07/23 09:09:08 PDT] [WARN] (server.(*Server).ListenAndServe:148) Cannot start udp server. please check your configuration
[2014/07/23 09:09:08 PDT] [INFO] (server.(*Server).ListenAndServe:169) Starting Http Api server on port 8086
[2014/07/23 09:09:08 PDT] [INFO] (server.(*Server).reportStats:207) Reporting stats: &influxdb.Series{Name:"reports", Columns:[]string{"os", "arch", "id", "version"}, Points:[][]interface {}{[]interface {}{"linux", "amd64", "b953bb9e7196f7d4", "0.7.3"}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment