Skip to content

Instantly share code, notes, and snippets.

@gardart
Created August 12, 2016 18:41
Show Gist options
  • Save gardart/49e33566abaec73e73953ad91f80c784 to your computer and use it in GitHub Desktop.
Save gardart/49e33566abaec73e73953ad91f80c784 to your computer and use it in GitHub Desktop.
input {
file {
# Example file generation :
# $ curl "http://brunnur.vedur.is/athuganir/athtafla/2016071410.html" 2>/dev/null | grep -i -e '</\?TABLE\|</\?TD\|</\?TR' | tr -d '\n' | se d 's
/Ig' | sed 's/<\/\?\(TABLE\|TR\)[^>]*>//Ig' | sed 's/^<T[DH][^>]*>\|<\/\?T[DH][^>]*>$//Ig' | sed 's/<\/T[DH][^>]*><T[DH][^>]*>/,/Ig' | sed 's/<[
ed 's/^[\ \t]*//g' | sed '/^\s*$/d' | sed 's/^/2016071410,/' > /tmp/weather/data7.csv
#
path => "/tmp/weather/*"
start_position => "beginning"
type => "weather-data"
}
}
filter {
if [type] == "weather-data" {
fingerprint {
method => "SHA1"
key => "H#83hjd93hH"
target => "[@metadata][fingerprint]"
}
csv {
separator => ","
columns => ["observation_date","station_id","name","altitude","T","TN","TX","D","F","FX","FG","RH","P"]
}
if [station_id] == "station_id" { # We dont want headers in our data
drop { }
} else {
mutate {
remove_field => [ "message", "host", "@version" ]
}
mutate {
convert => { "altitude" => "integer" }
convert => { "T" => "float" }
convert => { "TN" => "float" }
convert => { "TX" => "float" }
convert => { "D" => "integer" }
convert => { "F" => "float" }
convert => { "FX" => "float" }
convert => { "FG" => "float" }
convert => { "RH" => "integer" }
convert => { "P" => "float" }
}
date {
match => [ "observation_date", "yyyyMMddHH" ]
remove_field => ["observation_date"]
}
}
}
}
output {
if [type] == "weather-data" {
elasticsearch {
hosts => ["localhost:9200"]
index => "weather-data-1"
document_id => "%{[@metadata][fingerprint]}"
}
stdout {
codec => rubydebug
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment