Use c3.xlarge instance or c3.2xlarge instance to store ElasticSearch server. We need a fair bit of RAM and computing power, as well as 2 SSD disks for storage. We need to choose exising IAM role or create a new one so that ElasticSearch can seamlessly discover other cluster nodes hosted on EC2. Also, don't forget to add both SSDs to the list of devices (/dev/sdb and /dev/sdc).
First we need to increase system-wide file descriptor limits. To make it work temporary (will reset on the next reboot) execute from bash:
sysctl -w fs.file-max=100000
sysctl -w vm.max_map_count=262144
To make it more permanent i.e. available after the reboot we need to edit /etc/sysctl.conf file using:
sudoedit /etc/sysctl.conf
and add following entries:
fs.file-max = 100000
vm.max_map_count = 262144
Now we need to apply changes with
sysctl -p
and verify if all went well by executing
cat /proc/sys/fs/file-max
Execute following lines from bash:
echo "* soft nofile 100000" | sudo tee -a /etc/security/limits.conf
echo "* hard nofile 100000" | sudo tee -a /etc/security/limits.conf
echo "* soft memlock unlimited" | sudo tee -a /etc/security/limits.conf
echo "* hard memlock unlimited" | sudo tee -a /etc/security/limits.conf
Verify hard and soft limits with (need to logout to apply):
ulimit -Hn
ulimit -Sn
sudo swapoff -a
For a temporary solution (resets after the next boot) use
sysctl -w vm.swappiness = 1
or edit /etc/sysctl.conf
file using
sudoedit /etc/sysctl.conf
and add:
vm.swappiness = 1
mount_point=/media/ephemeral0
umount $mount_point
mdadm --stop /dev/md0 /dev/md127
yes | mdadm --create /dev/md0 --level=stripe --raid-devices=2 /dev/sdb /dev/sdc
mkfs.ext4 /dev/md0
mount -t ext4 /dev/md0 $mount_point
Add to /etc/fstab to make raid available after reboot:
/dev/md127 /media/ephemeral0 ext4 defaults,nofail,comment=cloudconfig 0 2
Check with:
sudo mount -a
Optionally, create shell script that does that all:
#!/bin/sh
if [[ `whoami` != root ]]; then
echo "need root privileges"
exit
fi
mount_point=/media/ephemeral0
umount $mount_point
mdadm --stop /dev/md0 /dev/md127
yes | mdadm --create /dev/md0 --level=0 --raid-devices=2 /dev/sdb /dev/sdc
mkfs.ext4 /dev/md0
mount -t ext4 /dev/md0 $mount_point
chmod 777 $mount_point
sed -i.bak s,/dev/sdb,/dev/md127,g /etc/fstab
sudo yum install -y haproxy
wget http://hatop.googlecode.com/files/hatop-0.7.7.tar.gz
tar xvf hatop-0.7.7.tar.gz && cd hatop-0.7.7
sudo install -m 755 bin/hatop /usr/local/bin
sudo install -m 644 man/hatop.1 /usr/local/share/man/man1
sudo gzip /usr/local/share/man/man1/hatop.1
Hatop will be called with (once haproxy is configured) hatop -s /var/lib/haproxy/stats
Edit /etc/rsyslog.conf file with
sudoedit /etc/rsyslog.conf
Uncomment following lines (allows listening from port 514):
#$ModLoad imudp
#$UDPServerRun 514
Add line (limits listening from loopback)
$UDPServerAddress 127.0.0.1
Configure local2 events to go to the /var/log/haproxy.log file by creating new file and then add to it:
sudoedit /etc/rsyslog.d/haproxy.conf
local2.* /var/log/haproxy.log
Restart rsyslog service with sudo service rsyslog restart
.
Enter following to bash:
sudo cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.orig
sudoedit /etc/haproxy/haproxy.cfg
This is how haproxy configuration should look like:
global
chroot /var/lib/haproxy
pidfile /var/run/haproxy.pid
maxconn 4096
user haproxy
group haproxy
nbproc 1
log 127.0.0.1 local2
daemon
stats socket /var/lib/haproxy/stats mode 777 level admin
defaults
mode http
log global
option httplog
option dontlognull
option http-server-close
option forwardfor except 127.0.0.0/8
option redispatch
retries 3
timeout http-request 10s
timeout queue 1m
timeout connect 5s
timeout client 90s
timeout server 90s
timeout http-keep-alive 10s
timeout check 10s
maxconn 3000
userlist internal_users
user app insecure-password xxxxx
userlist external_users
user ext-user insecure-password xxxxxx
frontend elastic_internal
bind *:9665
acl auth_okay http_auth(internal_users)
http-request auth realm local if !auth_okay
default_backend elastic
frontend elastic_external
bind *:9666
acl auth_okay http_auth(external_users)
http-request auth realm local if !auth_okay
default_backend elastic
frontend elastic_kibana_external
bind *:9667
acl auth_okay http_auth(external_users)
http-request auth realm local if !auth_okay
default_backend elastic_kibana
backend elastic
stats enable
balance roundrobin
option httpchk GET /
option redispatch
server elastic1 0.0.0.0:9200 check inter 30s
backend elastic_kibana
stats enable
balance roundrobin
option httpchk GET /
option redispatch
server elastic_kibana1 0.0.0.0:5601 check inter 30s
Start haproxy as a service using:
sudo service haproxy start
Make haproxy starts after reboot using:
sudo chkconfig --add haproxy
sudo chkconfig haproxy on
If there is no logging, check if haproxy user (or any other user under which haproxy daemon is running) is owner of /var/log/haproxy.log
.
Note files & folders locations used by haproxy:
log: /var/log/haproxy.log
admin socket: /var/lib/haproxy/stats
hatop: hatop -s /var/lib/haproxy/stats
There are two ways to install elasticsearch; from source or by adding elasticsearch yum repository.
Download the source using:
wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.4.4.zip
sudo unzip elasticsearch-1.4.4.zip /usr/local/elasticsearch && cd /usr/local/elasticsearch/elasticsearch-1.4.4
Install service wrapper using:
wget https://github.com/elasticsearch/elasticsearch-servicewrapper/zipball/master && unzip master
mv elastic-elasticsearch-servicewrapper-8513436/service/ /usr/local/elasticsearch/elasticsearch-1.4.4/bin
Set elasticsearch home folder in elasticsearch.conf:
cd /usr/local/elasticsearch/elasticsearch-1.4.4
vim bin/service/elasticsearch.conf
set.default.ES_HOME=/usr/local/elasticsearch/elasticsearch-1.4.4
Change permission on elasticsearch64 binary:
chmod a+x bin/service/elasticsearch64
Install elasticsearch service:
sudo bin/service/elasticsearch64 install
Run ElasticSearch as service:
sudo bin/service/elasticsearch64 start
Add elasticsearch repository to yum:
sudo rpm --import https://packages.elasticsearch.org/GPG-KEY-elasticsearch
Create file using:
sudoedit /etc/yum.repos.d/elasticsearch.repo
and add to it the following content:
[elasticsearch-1.4]
name=Elasticsearch repository for 1.4.x packages
baseurl=http://packages.elasticsearch.org/elasticsearch/1.4/centos
gpgcheck=1
gpgkey=http://packages.elasticsearch.org/GPG-KEY-elasticsearch
enabled=1
Install elasticsearch:
sudo yum install -y elasticsearch
###Run elasticsearch after reboot:
sudo chkconfig --add elasticsearch
sudo chkconfig elasticsearch on
cd /usr/share/elasticsearch
sudo bin/plugin install elasticsearch/elasticsearch-cloud-aws/2.4.1
sudo bin/plugin install mobz/elasticsearch-head
sudo bin/plugin install royrusso/elasticsearch-HQ
Add to .bashrc (half of the available memory on EC2 instance):
export ES_HEAP_SIZE=7680m
Reload .bashrc and create a backup copy of elasticsearch.config:
. ~/.bashrc
sudo cp /etc/elasticsearch/elasticsearch.yml /etc/elasticsearch/elasticsearch.yml.orig
Modify elasticsearch.yml:
path:
data: /media/ephemeral0/data
work: /media/ephemeral0/work
logs: /media/ephemeral0/logs
bootstrap.mlockall: true
index.store.type: mmapfs
http.compression: true
transport.tcp.compress: true
script.disable_dynamic: false
cluster.name: elastic-logs
discovery:
type: ec2
ec2:
groups: elastic-logs
cd /usr/share/elasticsearch
sudo bin/plugin -i elasticsearch/marvel/latest
Using our only ElasticSearch instance to collect Marvel data would be too much, it is recommended to use a separate ES installation for this purpose. Put to elasticsearch.yml following line to disable marvel agent:
marvel.agent.enabled: false
It is highly recommended to install Kibana on a separate EC2 instance.
cd ~
wget https://download.elasticsearch.org/kibana/kibana/kibana-4.0.1-linux-x64.tar.gz
tar xzvf kibana-4.0.1-linux-x64.tar.gz && cd kibana-4.0.1-linux-x64
bin/kibana
Create idex template on elasticsearch such as. Note that in production we should use e.g. 6 shards and 1 replica. So be careful and don't forget to change template definitions when setting up the production cluster. Also we should try out different values for following settings (check http://blog.sematext.com/2013/07/08/elasticsearch-refresh-interval-vs-indexing-performance/):
indices.memory.index_buffer_size
index.translog.flush_threshold_ops
index.refresh_interval
This needs to be executed in the ElasticSearch:
PUT /_template/logstash-runner
{
"template": "logstash-runner-*",
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index.refresh_interval": "5s"
},
"mappings": {
"logs": {
"properties": {
"host": {
"type": "string",
"index": "not_analyzed"
},
"time_reported": {
"type": "date",
"format": " date_time"
},
"id": {
"type": "long"
},
"task": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"time": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss"
}
}
}
}
}
The alternative to creating index template by executing the above script in elasticsearch is to create a template configuration file. This means creating a templates
folder beneath /etc/elasticsearch
(the default config path, check this link for path details), and creating logstash_runner.json
file containing following:
{
"logstash-runner": {
"template": "logstash-runner-*",
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"index.refresh_interval": "5s"
},
"mappings": {
"logs": {
"properties": {
"host": {
"type": "string",
"index": "not_analyzed"
},
"time_reported": {
"type": "date",
"format": " date_time"
},
"id": {
"type": "long"
},
"task": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"index": "not_analyzed"
},
"time": {
"type": "date",
"format": "yyyy-MM-dd'T'HH:mm:ss"
}
}
}
}
}
}
We need to modify logstash configuration at /etc/logstash/conf.d/my.conf on each EC2 instance sending logs. Use something like the following configuration script:
input {
file {
path => "/var/log/my.log"
start_position => 'beginning'
sincedb_path => "/var/log/logstash/.sincedb"
codec => json {
charset => "UTF-8"
}
}
}
filter {
if [message][url] =~ /.+/ {
mutate {add_field => {"url" => "%{[message][url]}"}}
}
if [message][time] =~ /.+/ {
mutate {add_field => {"time" => "%{[message][time]}"}}
}
mutate {
remove_field => ["path"]
rename => ["timeReported", "time_reported"]
add_field => {"id" => "%{[message][id]}"}
add_field => {"task" => "%{[message][task]}"}
remove_field => ["message"]
}
}
output {
elasticsearch_http {
host => "111.222.101.202"
port => 9665
index => "logstash-runner-%{+YYYY-MM-dd}"
index_type => "logs"
manage_template => false
user => "runner"
password => "xxxxxx"
}
}