Created
February 14, 2020 16:35
-
-
Save spinscale/417e2e2c1a44b617a16f85c7f461d46d to your computer and use it in GitHub Desktop.
Use filebeat to read CSV data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Data is from https://www.sciencedirect.com/science/article/pii/S2352340918315191 | |
filebeat.inputs: | |
- type: stdin | |
setup.template.overwrite: true | |
setup.template.append_fields: | |
- name: arrival_date | |
type: date | |
processors: | |
- decode_csv_fields: | |
fields: | |
message: "message" | |
separator: "," | |
ignore_missing: false | |
overwrite_keys: true | |
trim_leading_space: true | |
fail_on_error: true | |
- drop_fields: | |
fields: [ "log", "host", "ecs", "input", "agent" ] | |
- extract_array: | |
field: message | |
mappings: | |
is_canceled: 0 | |
lead_time: 1 | |
arrival_date_year: 2 | |
arrival_date_month: 3 | |
arrival_date_week_number: 4 | |
arrival_date_day_of_month: 5 | |
stays_in_weekend_nights: 6 | |
stays_in_week_nights: 7 | |
adults: 8 | |
children: 9 | |
babies: 10 | |
meal: 11 | |
country: 12 | |
market_segment: 13 | |
distribution_channel: 14 | |
is_repeated_guest: 15 | |
previous_cancellations: 16 | |
previous_bookings_not_canceled: 17 | |
reserved_room_type: 18 | |
assigned_room_type: 19 | |
booking_changes: 20 | |
deposit_type: 21 | |
handling_agent: 22 | |
company: 23 | |
days_in_waiting_list: 24 | |
customer_type: 25 | |
adr: 26 | |
required_car_parking_spaces: 27 | |
total_of_special_requests: 28 | |
reservation_status: 29 | |
reservation_status_date: 30 | |
- drop_fields: | |
fields: [ "message" ] | |
- drop_fields: | |
when: | |
equals: | |
company: "NULL" | |
fields: [ "company" ] | |
- drop_fields: | |
when: | |
equals: | |
handling_agent: "NULL" | |
fields: [ "handling_agent" ] | |
- script: | |
lang: javascript | |
id: my_filter | |
source: > | |
function process(event) { | |
var months = [] | |
months["January"] = '01' | |
months["February"] = '02' | |
months["March"] = '03' | |
months["April"] = '04' | |
months["May"] = '05' | |
months["June"] = '06' | |
months["July"] = '07' | |
months["August"] = '08' | |
months["September"] = '09' | |
months["October"] = '10' | |
months["November"] = '11' | |
months["December"] = '12' | |
// identify a header line and drop the event | |
if (event.Get('is_canceled') == 'IsCanceled') { | |
event.Cancel() | |
return; | |
} | |
// special case, children can be "NA", but we want this to be a number | |
if (event.Get('children') == 'NA') { | |
event.Delete('children') | |
} | |
// trim some strings that are known to be long | |
event.Put("assigned_room_type", event.Get("assigned_room_type").trim()); | |
event.Put("deposit_type", event.Get("deposit_type").trim()); | |
event.Put("reserved_room_type", event.Get("reserved_room_type").trim()); | |
event.Put("meal", event.Get("meal").trim()); | |
// never forget the booleans! | |
event.Put("is_canceled", event.Get("is_canceled") == "true"); | |
event.Put("is_repeated_guest", event.Get("is_repeated_guest") == "true"); | |
// proper date construction | |
var day_of_month = event.Get("arrival_date_day_of_month") | |
if (day_of_month.length == 1) day_of_month = '0' + day_of_month | |
var month = event.Get("arrival_date_month") | |
var year = event.Get("arrival_date_year") | |
event.Put('arrival_date', year + "-" + months[month] + "-" + day_of_month) | |
event.Delete('arrival_date_day_of_month') | |
event.Delete('arrival_date_month') | |
event.Delete('arrival_date_year') | |
event.Delete('arrival_date_week_number') | |
} | |
# TODO construct proper date | |
- convert: | |
fail_on_error: false | |
ignore_missing: true | |
fields: | |
- { from : "stays_in_weekend_nights", type : "integer" } | |
- { from : "days_in_waiting_list", type : "integer" } | |
- { from : "stays_in_week_nights", type : "integer" } | |
- { from : "required_car_parking_spaces", type : "integer" } | |
- { from : "total_of_special_requests", type : "integer" } | |
- { from : "children", type : "integer" } | |
- { from : "previous_cancellations", type : "integer" } | |
- { from : "booking_changes", type : "integer" } | |
- { from : "adults", type : "integer" } | |
- { from : "arrival_date_day_of_month", type : "integer" } | |
- { from : "previous_bookings_not_canceled", type : "integer" } | |
- { from : "arrival_date_week_number", type : "integer" } | |
- { from : "babies", type : "integer" } | |
output.elasticsearch.hosts: "localhost:9200" | |
#output.console.pretty: true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hi,
Could you please tell me---
case: .csv file having 3 columns and one of the columns having multiple lines of data and also enter separated, so how to get those data in correct rows and columns?