Created
September 4, 2011 03:47
-
-
Save bytesource/1192215 to your computer and use it in GitHub Desktop.
Simple CSV Parser vs. CSV Gem Benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'nokogiri' | |
require 'parslet' | |
require 'csv' | |
# -------------------------------------------------- | |
# Auxiliary code | |
class String | |
def unquote | |
self.gsub(/(^"|"$)/,"").gsub(/""/,'"') | |
end | |
end | |
def time &block | |
t1 = Time.now | |
block.call if block_given? | |
t2 = Time.now | |
puts t2-t1 | |
end | |
# -------------------------------------------------- | |
# Fetching test data | |
doc = Nokogiri::HTML(open('https://github.com/circle/fastercsv/blob/master/test/test_data.csv')) | |
data = doc.css('pre div.line').inject("") do |result, link| | |
result << link.content << "\n" | |
result | |
end | |
# -------------------------------------------------- | |
# CSV parser | |
class Parser < Parslet::Parser | |
rule(:file) {(record.as(:row) >> newline).repeat(1)} | |
rule(:record) {field.as(:column) >> (comma >> field.as(:column)).repeat} | |
rule(:field) {escaped | non_escaped} | |
rule(:escaped) {d_quote >> (textdata | comma | cr | lf | d_quote >> d_quote).repeat >> d_quote} | |
rule(:non_escaped) {textdata.repeat} | |
rule(:textdata) {((comma | d_quote | cr | lf).absent? >> any).repeat(1)} | |
rule(:newline) {lf >> cr.maybe} | |
rule(:lf) {str("\n")} | |
rule(:cr) {str("\r")} | |
rule(:d_quote) {str('"')} | |
rule(:comma) {str(',')} | |
root(:file) | |
end | |
class Transformer < Parslet::Transform | |
rule(:column => subtree(:field)) do | |
if field.is_a?(Array) # = empty array [] | |
nil | |
else | |
field.to_s.unquote | |
end | |
end | |
rule(:row => subtree(:array)) {array} | |
end | |
# -------------------------------------------------- | |
# Benchmark | |
# CSV parser | |
time do | |
tree = Parser.new.parse(data) | |
Transformer.new.apply(tree) | |
end | |
# => 33.771940832 | |
# CSV gem | |
time do | |
CSV.parse(data) | |
end | |
# => 0.040131919 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment