Created
December 24, 2010 20:31
-
-
Save kejadlen/754490 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# An experiment with using the normal parser instead of the scanner. | |
class AlphaCSV | |
%%{ | |
machine alphacsv; | |
textdata = 0x20..0x21 | 0x23..0x2b | 0x2d..0x7e; | |
cr = 0x0d; | |
lf = 0x0a; | |
comma = 0x2c; | |
dquote = 0x22; | |
crlf = cr? lf; | |
non_escaped = textdata* > { ts = fpc } % { current_line << data[ts...fpc].pack('c*') }; | |
escaped = dquote (textdata | comma | cr | lf | dquote{2})* > { ts = fpc } dquote % { current_line << data[ts...fpc-1].pack('c*').sub('""', '"') }; | |
field = escaped | non_escaped; | |
record = (field (comma field)*) % { csv << current_line; current_line = [] }; | |
main := record (crlf record)* crlf?; | |
}%% | |
%% write data; | |
class << self | |
def parse data | |
data = data.unpack('c*') | |
csv = [] | |
current_line = [] | |
%% write init; | |
eof = pe | |
%% write exec; | |
csv.pop if csv.last == [''] | |
csv | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# line 1 "alphacsv.rl" | |
# file = [header CRLF] record *(CRLF record) [CRLF] | |
# header = name *(COMMA name) | |
# record = field *(COMMA field) | |
# name = field | |
# field = (escaped / non-escaped) | |
# escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE | |
# non-escaped = *TEXTDATA | |
# COMMA = %x2C | |
# CR = %x0D ;as per section 6.1 of RFC 2234 [2] | |
# DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2] | |
# LF = %x0A ;as per section 6.1 of RFC 2234 [2] | |
# CRLF = CR LF ;as per section 6.1 of RFC 2234 [2] | |
# TEXTDATA = %x20-21 / %x23-2B / %x2D-7E | |
class AlphaCSV | |
# line 35 "alphacsv.rl" | |
# line 24 "alphacsv.rb" | |
class << self | |
attr_accessor :_alphacsv_actions | |
private :_alphacsv_actions, :_alphacsv_actions= | |
end | |
self._alphacsv_actions = [ | |
0, 1, 2, 1, 5, 1, 6, 1, | |
7, 1, 8, 1, 9, 2, 0, 1, | |
2, 3, 4 | |
] | |
class << self | |
attr_accessor :_alphacsv_key_offsets | |
private :_alphacsv_key_offsets, :_alphacsv_key_offsets= | |
end | |
self._alphacsv_key_offsets = [ | |
0, 0, 1, 6, 12, 18 | |
] | |
class << self | |
attr_accessor :_alphacsv_trans_keys | |
private :_alphacsv_trans_keys, :_alphacsv_trans_keys= | |
end | |
self._alphacsv_trans_keys = [ | |
10, 10, 13, 34, 32, 126, 10, 13, | |
34, 44, 32, 126, 32, 33, 35, 43, | |
45, 126, 34, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_single_lengths | |
private :_alphacsv_single_lengths, :_alphacsv_single_lengths= | |
end | |
self._alphacsv_single_lengths = [ | |
0, 1, 3, 4, 0, 1 | |
] | |
class << self | |
attr_accessor :_alphacsv_range_lengths | |
private :_alphacsv_range_lengths, :_alphacsv_range_lengths= | |
end | |
self._alphacsv_range_lengths = [ | |
0, 0, 1, 1, 3, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_index_offsets | |
private :_alphacsv_index_offsets, :_alphacsv_index_offsets= | |
end | |
self._alphacsv_index_offsets = [ | |
0, 0, 2, 7, 13, 17 | |
] | |
class << self | |
attr_accessor :_alphacsv_trans_targs | |
private :_alphacsv_trans_targs, :_alphacsv_trans_targs= | |
end | |
self._alphacsv_trans_targs = [ | |
3, 0, 2, 2, 5, 2, 3, 3, | |
1, 2, 3, 4, 0, 4, 4, 4, | |
3, 2, 3, 3, 3, 3, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_trans_actions | |
private :_alphacsv_trans_actions, :_alphacsv_trans_actions= | |
end | |
self._alphacsv_trans_actions = [ | |
5, 0, 0, 0, 16, 0, 11, 5, | |
0, 0, 3, 0, 0, 0, 0, 0, | |
9, 0, 7, 11, 9, 7, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_to_state_actions | |
private :_alphacsv_to_state_actions, :_alphacsv_to_state_actions= | |
end | |
self._alphacsv_to_state_actions = [ | |
0, 0, 0, 13, 0, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_from_state_actions | |
private :_alphacsv_from_state_actions, :_alphacsv_from_state_actions= | |
end | |
self._alphacsv_from_state_actions = [ | |
0, 0, 0, 1, 0, 0 | |
] | |
class << self | |
attr_accessor :_alphacsv_eof_trans | |
private :_alphacsv_eof_trans, :_alphacsv_eof_trans= | |
end | |
self._alphacsv_eof_trans = [ | |
0, 0, 20, 0, 21, 22 | |
] | |
class << self | |
attr_accessor :alphacsv_start | |
end | |
self.alphacsv_start = 3; | |
class << self | |
attr_accessor :alphacsv_first_final | |
end | |
self.alphacsv_first_final = 3; | |
class << self | |
attr_accessor :alphacsv_error | |
end | |
self.alphacsv_error = 0; | |
class << self | |
attr_accessor :alphacsv_en_main | |
end | |
self.alphacsv_en_main = 3; | |
# line 38 "alphacsv.rl" | |
class << self | |
def parse data | |
data = data.unpack('c*') | |
csv = [] | |
current_line = [] | |
# line 150 "alphacsv.rb" | |
begin | |
p ||= 0 | |
pe ||= data.length | |
cs = alphacsv_start | |
ts = nil | |
te = nil | |
act = 0 | |
end | |
# line 47 "alphacsv.rl" | |
eof = pe | |
# line 165 "alphacsv.rb" | |
begin | |
_klen, _trans, _keys, _acts, _nacts = nil | |
_goto_level = 0 | |
_resume = 10 | |
_eof_trans = 15 | |
_again = 20 | |
_test_eof = 30 | |
_out = 40 | |
while true | |
_trigger_goto = false | |
if _goto_level <= 0 | |
if p == pe | |
_goto_level = _test_eof | |
next | |
end | |
if cs == 0 | |
_goto_level = _out | |
next | |
end | |
end | |
if _goto_level <= _resume | |
_acts = _alphacsv_from_state_actions[cs] | |
_nacts = _alphacsv_actions[_acts] | |
_acts += 1 | |
while _nacts > 0 | |
_nacts -= 1 | |
_acts += 1 | |
case _alphacsv_actions[_acts - 1] | |
when 2 then | |
# line 1 "NONE" | |
begin | |
ts = p | |
end | |
# line 199 "alphacsv.rb" | |
end # from state action switch | |
end | |
if _trigger_goto | |
next | |
end | |
_keys = _alphacsv_key_offsets[cs] | |
_trans = _alphacsv_index_offsets[cs] | |
_klen = _alphacsv_single_lengths[cs] | |
_break_match = false | |
begin | |
if _klen > 0 | |
_lower = _keys | |
_upper = _keys + _klen - 1 | |
loop do | |
break if _upper < _lower | |
_mid = _lower + ( (_upper - _lower) >> 1 ) | |
if data[p] < _alphacsv_trans_keys[_mid] | |
_upper = _mid - 1 | |
elsif data[p] > _alphacsv_trans_keys[_mid] | |
_lower = _mid + 1 | |
else | |
_trans += (_mid - _keys) | |
_break_match = true | |
break | |
end | |
end # loop | |
break if _break_match | |
_keys += _klen | |
_trans += _klen | |
end | |
_klen = _alphacsv_range_lengths[cs] | |
if _klen > 0 | |
_lower = _keys | |
_upper = _keys + (_klen << 1) - 2 | |
loop do | |
break if _upper < _lower | |
_mid = _lower + (((_upper-_lower) >> 1) & ~1) | |
if data[p] < _alphacsv_trans_keys[_mid] | |
_upper = _mid - 2 | |
elsif data[p] > _alphacsv_trans_keys[_mid+1] | |
_lower = _mid + 2 | |
else | |
_trans += ((_mid - _keys) >> 1) | |
_break_match = true | |
break | |
end | |
end # loop | |
break if _break_match | |
_trans += _klen | |
end | |
end while false | |
end | |
if _goto_level <= _eof_trans | |
cs = _alphacsv_trans_targs[_trans] | |
if _alphacsv_trans_actions[_trans] != 0 | |
_acts = _alphacsv_trans_actions[_trans] | |
_nacts = _alphacsv_actions[_acts] | |
_acts += 1 | |
while _nacts > 0 | |
_nacts -= 1 | |
_acts += 1 | |
case _alphacsv_actions[_acts - 1] | |
when 3 then | |
# line 1 "NONE" | |
begin | |
te = p+1 | |
end | |
when 4 then | |
# line 30 "alphacsv.rl" | |
begin | |
act = 1; end | |
when 5 then | |
# line 32 "alphacsv.rl" | |
begin | |
te = p+1 | |
end | |
when 6 then | |
# line 33 "alphacsv.rl" | |
begin | |
te = p+1 | |
begin csv << current_line; current_line = [] end | |
end | |
when 7 then | |
# line 30 "alphacsv.rl" | |
begin | |
te = p | |
p = p - 1; begin current_line << data[ts+1...te-1].pack('c*').sub('""', '"') end | |
end | |
when 8 then | |
# line 31 "alphacsv.rl" | |
begin | |
te = p | |
p = p - 1; begin current_line << data[ts...te].pack('c*') end | |
end | |
when 9 then | |
# line 1 "NONE" | |
begin | |
case act | |
when 0 then | |
begin begin | |
cs = 0 | |
_trigger_goto = true | |
_goto_level = _again | |
break | |
end | |
end | |
when 1 then | |
begin begin p = ((te))-1; end | |
current_line << data[ts+1...te-1].pack('c*').sub('""', '"') end | |
end | |
end | |
# line 314 "alphacsv.rb" | |
end # action switch | |
end | |
end | |
if _trigger_goto | |
next | |
end | |
end | |
if _goto_level <= _again | |
_acts = _alphacsv_to_state_actions[cs] | |
_nacts = _alphacsv_actions[_acts] | |
_acts += 1 | |
while _nacts > 0 | |
_nacts -= 1 | |
_acts += 1 | |
case _alphacsv_actions[_acts - 1] | |
when 0 then | |
# line 1 "NONE" | |
begin | |
ts = nil; end | |
when 1 then | |
# line 1 "NONE" | |
begin | |
act = 0 | |
end | |
# line 339 "alphacsv.rb" | |
end # to state action switch | |
end | |
if _trigger_goto | |
next | |
end | |
if cs == 0 | |
_goto_level = _out | |
next | |
end | |
p += 1 | |
if p != pe | |
_goto_level = _resume | |
next | |
end | |
end | |
if _goto_level <= _test_eof | |
if p == eof | |
if _alphacsv_eof_trans[cs] > 0 | |
_trans = _alphacsv_eof_trans[cs] - 1; | |
_goto_level = _eof_trans | |
next; | |
end | |
end | |
end | |
if _goto_level <= _out | |
break | |
end | |
end | |
end | |
# line 51 "alphacsv.rl" | |
csv << current_line unless current_line.empty? | |
csv | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AlphaCSV | |
%%{ | |
machine alphacsv; | |
textdata = 0x20..0x21 | 0x23..0x2b | 0x2d..0x7e; | |
cr = 0x0d; | |
lf = 0x0a; | |
comma = 0x2c; | |
dquote = 0x22; | |
crlf = cr? lf; | |
non_escaped = textdata*; | |
escaped = dquote (textdata | comma | cr | lf | dquote{2})* dquote; | |
main := |* | |
escaped => { current_line << data[ts+1...te-1].pack('c*').sub('""', '"') }; | |
non_escaped => { current_line << data[ts...te].pack('c*') }; | |
comma; | |
crlf => { csv << current_line; current_line = [] }; | |
*|; | |
}%% | |
%% write data; | |
class << self | |
def parse data | |
data = data.unpack('c*') | |
csv = [] | |
current_line = [] | |
%% write init; | |
eof = pe | |
%% write exec; | |
csv << current_line unless current_line.empty? | |
csv | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'minitest/autorun' | |
require 'alphacsv' | |
class TestCSV < MiniTest::Unit::TestCase | |
def test_empty | |
assert_equal [], AlphaCSV.parse('') | |
end | |
def test_single_element | |
assert_equal [%w[ aaa ]], AlphaCSV.parse('aaa') | |
end | |
def test_multiple_elements | |
assert_equal [%w[ aaa bbb ]], AlphaCSV.parse('aaa,bbb') | |
end | |
def test_multiple_lines | |
assert_equal [%w[ aaa ], %w[ bbb ]], AlphaCSV.parse("aaa\r\nbbb") | |
end | |
def test_crlf | |
assert_equal [%w[ aaa ], %w[ bbb ]], AlphaCSV.parse("aaa\nbbb") | |
end | |
def test_rfc | |
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse("aaa,bbb,ccc\r\nzzz,yyy,xxx\r\n") | |
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse("aaa,bbb,ccc\r\nzzz,yyy,xxx") | |
assert_equal [%w[ aaa bbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","bbb","ccc"\r\nzzz,yyy,xxx)) | |
assert_equal [%W[ aaa b\r\nbb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","b\r\nbb","ccc"\r\nzzz,yyy,xxx)) | |
assert_equal [%w[ aaa b"bb ccc], %w[ zzz yyy xxx]], AlphaCSV.parse(%Q("aaa","b""bb","ccc"\r\nzzz,yyy,xxx)) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment