Skip to content

Instantly share code, notes, and snippets.

@J00MZ
Created June 10, 2020 06:54
Show Gist options
  • Save J00MZ/3eb9330352d7a9898b17d3b2918d7d74 to your computer and use it in GitHub Desktop.
Save J00MZ/3eb9330352d7a9898b17d3b2918d7d74 to your computer and use it in GitHub Desktop.
collectd config
#
# Config file for collectd(1).
# Please read collectd.conf(5) for a list of options.
# http://collectd.org/
#
##############################################################################
# Global #
#----------------------------------------------------------------------------#
# Global settings for the daemon. #
##############################################################################
#Hostname "localhost"
#FQDNLookup true
#BaseDir "/var/lib/collectd"
#PIDFile "/var/run/collectd.pid"
#PluginDir "/usr/lib64/collectd"
#TypesDB "/usr/share/collectd/types.db"
#----------------------------------------------------------------------------#
# When enabled, plugins are loaded automatically with the default options #
# when an appropriate <Plugin ...> block is encountered. #
# Disabled by default. #
#----------------------------------------------------------------------------#
#AutoLoadPlugin false
#----------------------------------------------------------------------------#
# When enabled, internal statistics are collected, using "collectd" as the #
# plugin name. #
# Disabled by default. #
#----------------------------------------------------------------------------#
#CollectInternalStats false
#----------------------------------------------------------------------------#
# Interval at which to query values. This may be overwritten on a per-plugin #
# base by using the 'Interval' option of the LoadPlugin block: #
# <LoadPlugin foo> #
# Interval 60 #
# </LoadPlugin> #
#----------------------------------------------------------------------------#
#Interval 10
#MaxReadInterval 86400
#Timeout 2
#ReadThreads 5
#WriteThreads 5
# Limit the size of the write queue. Default is no limit. Setting up a limit is
# recommended for servers handling a high volume of traffic.
#WriteQueueLimitHigh 1000000
#WriteQueueLimitLow 800000
##############################################################################
# Logging #
#----------------------------------------------------------------------------#
# Plugins which provide logging functions should be loaded first, so log #
# messages generated when loading or configuring other plugins can be #
# accessed. #
##############################################################################
LoadPlugin syslog
LoadPlugin logfile
<Plugin logfile>
LogLevel "info"
File "/var/log/collectd.log"
Timestamp true
PrintSeverity true
</Plugin>
##############################################################################
# LoadPlugin section #
##############################################################################
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin processes
LoadPlugin write_prometheus
LoadPlugin zookeeper
LoadPlugin Java
LoadPlugin exec
##############################################################################
# Plugin configuration #
#----------------------------------------------------------------------------#
# In this section configuration stubs for each plugin are provided. A desc- #
# ription of those options is available in the collectd.conf(5) manual page. #
##############################################################################
<Plugin df>
MountPoint "<DATA_ROOT>"
</Plugin>
<Plugin disk>
Disk "<DISK_ONE>"
Disk "<DISK_TWO>"
IgnoreSelected false
UseBSDName false
UdevNameAttr "DEVNAME"
</Plugin>
<Plugin exec>
Exec "equalum" "<KAFKA_DISK_SCRIPT>"
</Plugin>
<Plugin interface>
Interface "eth0"
IgnoreSelected false
ReportInactive true
UniqueName false
</Plugin>
<Plugin processes>
CollectFileDescriptor true
CollectContextSwitch true
CollectMemoryMaps true
ProcessMatch "spark" "(executor.id|EqlJobServer|deploy.master.Master|deploy.history.History|deploy.worker.Worker)"
ProcessMatch "spark-executors" "executor.id"
ProcessMatch "spark-driver" "EqlJobServer"
ProcessMatch "spark-master" "deploy.master.Master"
ProcessMatch "spark-worker" "deploy.worker.Worker"
ProcessMatch "spark-other" "(deploy.master.Master|deploy.history.History|deploy.worker.Worker)"
ProcessMatch "kafka" "-Dname=kafka_broker"
ProcessMatch "zk" "-Dname=zookeeper"
ProcessMatch "engine" "-Dname=eq_engine"
ProcessMatch "agent" "-Dname=eq_agent"
ProcessMatch "hdfs" "-Dhadoop"
ProcessMatch "Transformer" "-DeqTransformer"
ProcessMatch "hdfs-nameNode" "NameNode"
ProcessMatch "hdfs-dataNode" "DataNode"
ProcessMatch "hdfs-journalNode" "JournalNode"
ProcessMatch "hdfs-zkfc" "DFSZKFailoverController"
ProcessMatch "prometheus" "/prometheus/"
ProcessMatch "pushgateway" "/pushgateway/"
ProcessMatch "grafana" "grafana-server"
</Plugin>
<Plugin write_prometheus>
Port "9103"
</Plugin>
<Plugin zookeeper>
Host "<HOST>"
Port "2222"
</Plugin>
<Plugin "java">
JVMARG "-Djava.class.path=/usr/share/collectd/java/collectd-api.jar:/usr/share/collectd/java/generic-jmx.jar"
LoadPlugin "org.collectd.java.GenericJMX"
<Plugin "GenericJMX">
<MBean "memory">
ObjectName "java.lang:type=MemoryPool,*"
InstancePrefix "memory_pool-"
InstanceFrom "name"
<Value>
Type "memory"
Table true
Attribute "Usage"
</Value>
</MBean>
<MBean "kafka_UnderReplicatedPartitions">
ObjectName "kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions"
InstancePrefix "UnderReplicatedPartitions"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_ActiveControllerCount">
ObjectName "kafka.controller:type=KafkaController,name=ActiveControllerCount"
InstancePrefix "ActiveControllerCount"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_OfflinePartitionsCount">
ObjectName "kafka.controller:type=KafkaController,name=OfflinePartitionsCount"
InstancePrefix "OfflinePartitionsCount"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_LeaderElections">
ObjectName "kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs"
InstancePrefix "LeaderElections"
<Value>
Type "counter"
Table false
Attribute "Count"
</Value>
</MBean>
<MBean "kafka_UncleanLeaderElections">
ObjectName "kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec"
InstancePrefix "UncleanLeaderElections"
<Value>
Type "counter"
Table false
Attribute "Count"
</Value>
</MBean>
<MBean "kafka_TotalTime">
ObjectName "kafka.network:type=RequestMetrics,name=TotalTimeMs,request=*"
InstancePrefix "TotalTime"
<Value>
Type "counter"
Table false
InstanceFrom "request"
Attribute "Count"
</Value>
</MBean>
<MBean "kafka_Throughput">
ObjectName "kafka.server:type=BrokerTopicMetrics,name=Bytes*PerSec"
InstancePrefix "Throughput"
<Value>
Type "counter"
Table false
InstanceFrom "name"
Attribute "Count"
</Value>
</MBean>
<MBean "kafka_Purgatory">
ObjectName "kafka.server:type=DelayedOperationPurgatory,name=NumDelayedOperations,delayedOperation=*"
InstancePrefix "Purgatory"
<Value>
Type "gauge"
Table false
InstanceFrom "delayedOperation"
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_Connections">
ObjectName "kafka.server:type=socket-server-metrics,listener=PLAINTEXT,networkProcessor=*"
InstancePrefix "Connections"
<Value>
Type "gauge"
Table false
InstanceFrom "networkProcessor"
Attribute "connection-count"
</Value>
</MBean>
<MBean "kafka_BytesOutPerTopic">
ObjectName "kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=*"
InstancePrefix "BytesOutPerTopic"
<Value>
Type "counter"
Table false
InstanceFrom "topic"
Attribute "Count"
</Value>
</MBean>
<MBean "kafka_TopicSize">
ObjectName "kafka.log:type=Log,name=Size,topic=*,partition=*"
InstancePrefix "TopicSize"
InstanceFrom "topic"
<Value>
Type "gauge"
Table false
InstanceFrom "partition"
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_TopicStartOffset">
ObjectName "kafka.log:type=Log,name=LogStartOffset,topic=*,partition=*"
InstancePrefix "TopicStartOffset"
InstanceFrom "topic"
<Value>
Type "gauge"
Table false
InstanceFrom "partition"
Attribute "Value"
</Value>
</MBean>
<MBean "kafka_TopicEndOffset">
ObjectName "kafka.log:type=Log,name=LogEndOffset,topic=*,partition=*"
InstancePrefix "TopicEndOffset"
InstanceFrom "topic"
<Value>
Type "gauge"
Table false
InstanceFrom "partition"
Attribute "Value"
</Value>
</MBean>
<MBean "spark_failedStages">
ObjectName "metrics:name=*DAGScheduler.stage.failedStages"
InstancePrefix "FailedStages"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "spark_runningStages">
ObjectName "metrics:name=*DAGScheduler.stage.runningStages"
InstancePrefix "RunningStages"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "spark_waitingStages">
ObjectName "metrics:name=*DAGScheduler.stage.waitingStages"
InstancePrefix "WaitingStages"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "spark_activeJobs">
ObjectName "metrics:name=*DAGScheduler.job.activeJobs"
InstancePrefix "ActiveJobs"
<Value>
Type "gauge"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "spark_allJobs">
ObjectName "metrics:name=*DAGScheduler.job.allJobs"
InstancePrefix "AllJobs"
<Value>
Type "counter"
Table false
Attribute "Value"
</Value>
</MBean>
<MBean "spark_messageProcessingTime">
ObjectName "metrics:name=*DAGScheduler.messageProcessingTime"
InstancePrefix "MessageProcessingTime"
<Value>
Type "counter"
Table false
Attribute "Count"
</Value>
</MBean>
<Connection>
Host "<HOST>"
ServiceURL "service:jmx:rmi:///jndi/rmi://<HOST>:<SPARK_JMX_PORT>/jmxrmi"
InstancePrefix "spark-"
Collect "spark_failedStages"
Collect "spark_runningStages"
Collect "spark_waitingStages"
Collect "spark_activeJobs"
Collect "spark_allJobs"
Collect "spark_messageProcessingTime"
</Connection>
<Connection>
Host "<HOST>"
ServiceURL "service:jmx:rmi:///jndi/rmi://<HOST>:<ENGINE_JMX_PORT>/jmxrmi"
InstancePrefix "engine-"
Collect "memory"
</Connection>
<Connection>
Host "<HOST>"
ServiceURL "service:jmx:rmi:///jndi/rmi://<HOST>:<KAFKA_JMX_PORT>/jmxrmi"
InstancePrefix "kafka-"
Collect "memory"
Collect "kafka_UnderReplicatedPartitions"
Collect "kafka_ActiveControllerCount"
Collect "kafka_OfflinePartitionsCount"
Collect "kafka_LeaderElections"
Collect "kafka_UncleanLeaderElections"
Collect "kafka_TotalTime"
Collect "kafka_Throughput"
Collect "kafka_Purgatory"
Collect "kafka_Connections"
Collect "kafka_BytesOutPerTopic"
Collect "kafka_TopicSize"
Collect "kafka_TopicStartOffset"
Collect "kafka_TopicEndOffset"
</Connection>
<Connection>
Host "<HOST>"
ServiceURL "service:jmx:rmi:///jndi/rmi://<HOST>:<ZK_JMX_PORT>/jmxrmi"
InstancePrefix "zk-"
Collect "memory"
</Connection>
</Plugin>
</Plugin>
Include "/etc/collectd.d"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment