in:
type: command
command: ruby test.rb
parser:
type: csv
charset: UTF-8
newline: CRLF
delimiter: ','
quote: '"'
escape: '"'
null_string: 'NULL'
trim_if_not_quoted: false
skip_header_lines: 1
allow_extra_columns: false
allow_optional_columns: false
columns:
- {name: '20160929', type: timestamp, format: '%Y%m%d'}
- {name: xxx, type: string}
- {name: yyy, type: string}
- {name: id, type: string}
- {name: account, type: string}
- {name: time, type: string}
- {name: purchase, type: string}
- {name: comment, type: string}
out: {type: stdout}
- 実際にはファイルを開いた際にヘッダなどを取り除く処理を行う
#!/usr/bin/env ruby
Dir.glob("**.csv") do |file|
meta = File.basename(file,".csv")
meta.gsub!('_',',')
File.open(file) do |f|
while( line = f.gets )
puts meta + "," + line
end
end
end
20160929_xxx_yyy.csv,20160930_xxx_yyy.csv
1,32864,2015-01-27 19:23:49,20150127,embulk
2,14824,2015-01-27 19:01:23,20150127,embulk jruby
3,27559,2015-01-28 02:20:02,20150128,"Embulk ""csv"" parser plugin"
4,11270,2015-01-29 11:54:36,20150129,NULL
embulk preview conf.yml
embulk preview -G conf.yml
2016-09-30 22:02:01.334 +0900: Embulk v0.8.13
2016-09-30 22:02:02.800 +0900 [INFO] (0001:preview): Loaded plugin embulk-input-command (0.1.4)
2016-09-30 22:02:02.846 +0900 [INFO] (0001:preview): Running command [sh, -c, ruby test.rb]
*************************** 1 ***************************
date (timestamp) : 2016-09-29 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 2
account ( string) : 14824
time ( string) : 2015-01-27 19:01:23
purchase ( string) : 20150127
comment ( string) : embulk jruby
*************************** 2 ***************************
date (timestamp) : 2016-09-29 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 3
account ( string) : 27559
time ( string) : 2015-01-28 02:20:02
purchase ( string) : 20150128
comment ( string) : Embulk "csv" parser plugin
*************************** 3 ***************************
date (timestamp) : 2016-09-29 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 4
account ( string) : 11270
time ( string) : 2015-01-29 11:54:36
purchase ( string) : 20150129
comment ( string) :
*************************** 4 ***************************
date (timestamp) : 2016-09-30 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 1
account ( string) : 32864
time ( string) : 2015-01-27 19:23:49
purchase ( string) : 20150127
comment ( string) : embulk
*************************** 5 ***************************
date (timestamp) : 2016-09-30 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 2
account ( string) : 14824
time ( string) : 2015-01-27 19:01:23
purchase ( string) : 20150127
comment ( string) : embulk jruby
*************************** 6 ***************************
date (timestamp) : 2016-09-30 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 3
account ( string) : 27559
time ( string) : 2015-01-28 02:20:02
purchase ( string) : 20150128
comment ( string) : Embulk "csv" parser plugin
*************************** 7 ***************************
date (timestamp) : 2016-09-30 00:00:00 UTC
xxx ( string) : xxx
yyy ( string) : yyy
id ( string) : 4
account ( string) : 11270
time ( string) : 2015-01-29 11:54:36
purchase ( string) : 20150129
comment ( string) :