Skip to content

Instantly share code, notes, and snippets.

@kddnewton
Last active August 7, 2023 19:54
Show Gist options
  • Save kddnewton/09830f563682d3cd8672c64ce5b7016e to your computer and use it in GitHub Desktop.
Save kddnewton/09830f563682d3cd8672c64ce5b7016e to your computer and use it in GitHub Desktop.
Faster JSON parser in Ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile do
source "https://rubygems.org"
gem "json_pure"
gem "benchmark-ips"
end
require "json/pure"
require "benchmark/ips"
class FastJSON < StringScanner
IGNORE = %r(
(?:
//[^\n\r]*[\n\r]| # line comments
/\* # c-style comments
(?:
[^*/]| # normal chars
/[^*]| # slashes that do not start a nested comment
\*[^/]| # asterisks that do not end this comment
/(?=\*/) # single slash before this comment's end
)*
\*/ # the End of this comment
|[ \t\r\n]+ # whitespaces: space, horizontal tab, lf, cr
)*
)mx
def initialize(source, opts = {})
if source.respond_to?(:to_str)
source = source.to_str
else
raise TypeError, "#{source.inspect} is not like a string"
end
if source.encoding != ::Encoding::ASCII_8BIT
source = source.encode(::Encoding::UTF_8)
source.force_encoding(::Encoding::ASCII_8BIT)
end
super(source)
opts ||= {}
@array_class = opts[:array_class] || Array
@decimal_class = opts[:decimal_class]
@object_class = opts[:object_class] || Hash
@allow_nan = !!opts[:allow_nan]
@freeze = !!opts[:freeze]
@symbolize_names = !!opts[:symbolize_names]
@current_nesting = 0
@max_nesting = opts.key?(:max_nesting) ? (opts[:max_nesting] || 0) : 100
@create_additions = opts.key?(:create_additions) ? !!opts[:create_additions] : false
@create_id = @create_additions ? JSON.create_id : nil
@match_string = opts[:match_string]
if @symbolize_names && @create_additions
raise ArgumentError, "options :symbolize_names and :create_additions cannot be used in conjunction"
end
end
def parse
skip(IGNORE)
value = parse_item
skip(IGNORE)
raise JSON::ParserError, "unexpected tokens after value" unless eos?
value
end
private
def parse_item
case
when scan(/"((?:[^\x0-\x1f"\\]|\\["\\\/bfnrt]|\\u[0-9a-fA-F]{4}|\\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*)"/n)
string = self[1]
return string if string.empty?
string.gsub!(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c|
case c[1].to_sym
when :b
"\b"
when :f
"\f"
when :n
"\n"
when :r
"\r"
when :t
"\t"
when :u # \uXXXX
bytes = String.new(encoding: Encoding::ASCII_8BIT)
i = 0
while c[i] == "\\" && c[i + 1] == "u"
bytes << c[i + 2, 2].to_i(16) << c[i + 4, 2].to_i(16)
i += 6
end
JSON.iconv("utf-8", "utf-16be", bytes).force_encoding(::Encoding::ASCII_8BIT)
else
c[1]
end
end
string.force_encoding(::Encoding::UTF_8)
string = -string if @freeze
if @create_additions and @match_string
@match_string.each do |pattern, clazz|
return clazz.json_create(string) if clazz.json_creatable? && string =~ pattern
end
end
string
when skip(/\{/)
@current_nesting += 1
if @max_nesting != 0 && @current_nesting > @max_nesting
raise JSON::NestingError, "nesting of #{@current_nesting} is too deep"
end
values = @object_class.new
skip(IGNORE)
if skip(/\}/)
@current_nesting -= 1
return values
end
while true
key = parse_item
raise JSON::ParserError, "expected a string key" unless key.is_a?(String)
skip(IGNORE)
key = key.to_sym if @symbolize_names
raise JSON::ParserError, "expected a ':' to follow the string key" unless skip(/:/)
skip(IGNORE)
values[key] = parse_item
skip(IGNORE)
case
when skip(/\}/)
@current_nesting -= 1
if @create_additions && (clazz_name = values[@create_id])
clazz = JSON.deep_const_get(clazz_name)
values = clazz.json_create(values) if clazz && clazz.json_creatable?
end
return values
when skip(/,/)
skip(IGNORE)
else
raise JSON::ParserError, "expected ',' or '}' after object value"
end
end
when skip(/\[/)
@current_nesting += 1
if @max_nesting != 0 && @current_nesting > @max_nesting
raise JSON::NestingError, "nesting of #{@current_nesting} is too deep"
end
values = @array_class.new
skip(IGNORE)
if skip("]")
@current_nesting -= 1
return values
end
while true
values << parse_item
skip(IGNORE)
case
when skip("]")
@current_nesting -= 1
return values
when skip(",")
skip(IGNORE)
else
raise JSON::ParserError, "expected ',' or ']' after array value"
end
end
when skip("true")
true
when skip("false")
false
when skip("null")
nil
when scan(/-?(?:0|[1-9]\d*)((?:\.\d+)?(?:[Ee][-+]?\d+)?)?/)
if !self[1]
Integer(self[0])
elsif @decimal_class
if @decimal_class == BigDecimal
BigDecimal(self[0])
else
@decimal_class.new(self[0]) || Float(self[0])
end
else
Float(self[0])
end
when @allow_nan && skip("NaN")
NaN
when @allow_nan && scan(/(-?)Infinity/)
self[1] ? -Float::INFINITY : Float::INFINITY
else
raise JSON::ParserError, "unexpected token at #{pos}: '#{peek(1)}'"
end
end
end
class FasterJSON < StringScanner
def initialize(source, opts = {})
if source.respond_to?(:to_str)
source = source.to_str
else
raise TypeError, "#{source.inspect} is not like a string"
end
if source.encoding != ::Encoding::ASCII_8BIT
source = source.encode(::Encoding::UTF_8)
source.force_encoding(::Encoding::ASCII_8BIT)
end
super(source)
opts ||= {}
@symbolize_names = !!opts[:symbolize_names]
@current_nesting = 0
@max_nesting = opts.key?(:max_nesting) ? (opts[:max_nesting] || 0) : 100
end
def parse
skip(/\s*/)
value = parse_item
skip(/\s*/)
raise JSON::ParserError, "unexpected tokens after value" unless eos?
value
end
private
def parse_item
case
when scan(/"((?:[^\x0-\x1f"\\]|\\["\\\/bfnrt]|\\u[0-9a-fA-F]{4}|\\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*)"/n)
string = self[1]
return string if string.empty?
string.gsub!(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c|
case c[1].to_sym
when :b
"\b"
when :f
"\f"
when :n
"\n"
when :r
"\r"
when :t
"\t"
when :u # \uXXXX
bytes = String.new(encoding: Encoding::ASCII_8BIT)
i = 0
while c[i] == "\\" && c[i + 1] == "u"
bytes << c[i + 2, 2].to_i(16) << c[i + 4, 2].to_i(16)
i += 6
end
JSON.iconv("utf-8", "utf-16be", bytes).force_encoding(::Encoding::ASCII_8BIT)
else
c[1]
end
end
string.force_encoding(::Encoding::UTF_8)
string
when skip("{")
@current_nesting += 1
if @max_nesting != 0 && @current_nesting > @max_nesting
raise JSON::NestingError, "nesting of #{@current_nesting} is too deep"
end
values = {}
skip(/\s*/)
if skip("}")
@current_nesting -= 1
return values
end
while true
key = parse_item
key = key.to_sym if @symbolize_names
raise JSON::ParserError, "expected a string key" unless key.is_a?(String)
raise JSON::ParserError, "expected a ':' to follow the string key" unless skip(/\s*:\s*/)
values[key] = parse_item
skip(/\s*/)
case
when skip(",")
skip(/\s*/)
when skip("}")
@current_nesting -= 1
return values
else
raise JSON::ParserError, "expected ',' or '}' after object value"
end
end
when skip("[")
@current_nesting += 1
if @max_nesting != 0 && @current_nesting > @max_nesting
raise JSON::NestingError, "nesting of #{@current_nesting} is too deep"
end
values = []
skip(/\s*/)
if skip("]")
@current_nesting -= 1
return values
end
while true
values << parse_item
skip(/\s*/)
case
when skip(",")
skip(/\s*/)
when skip("]")
@current_nesting -= 1
return values
else
raise JSON::ParserError, "expected ',' or ']' after array value"
end
end
when skip("true")
true
when skip("false")
false
when skip("null")
nil
when scan(/-?(?:0|[1-9]\d*)((?:\.\d+)?(?:[Ee][-+]?\d+)?)?/)
self[1] ? Float(self[0]) : Integer(self[0])
else
raise JSON::ParserError, "unexpected token at #{pos}: '#{peek(1)}'"
end
end
end
source = JSON.pretty_generate([true, false, nil, 3, "hi\u0000", [], ["a", "b", "c"], {}, { "a" => 1, "b" => 2, "c" => 3 }])
raise if JSON::Pure::Parser.new(source).parse != FastJSON.new(source).parse
raise if JSON::Pure::Parser.new(source).parse != FasterJSON.new(source).parse
Benchmark.ips do |x|
x.report("json_pure") { JSON::Pure::Parser.new(source).parse }
x.report("fast_json") { FastJSON.new(source).parse }
x.report("faster_json") { FasterJSON.new(source).parse }
x.compare!
end
Warming up --------------------------------------
json_pure 4.302k i/100ms
fast_json 8.027k i/100ms
faster_json 10.110k i/100ms
Calculating -------------------------------------
json_pure 42.378k (± 2.1%) i/s - 215.100k in 5.078163s
fast_json 78.217k (± 4.1%) i/s - 393.323k in 5.038024s
faster_json 97.307k (± 3.5%) i/s - 495.390k in 5.097498s
Comparison:
faster_json: 97306.6 i/s
fast_json: 78216.6 i/s - 1.24x slower
json_pure: 42377.7 i/s - 2.30x slower
@eregon
Copy link

eregon commented Feb 25, 2023

Results for TruffleRuby and 3.2.0 YJIT on my machine (without cpufreq/boost and with performance governor):

ruby 3.2.0 (2022-12-25 revision a528908271) +YJIT [x86_64-linux]
Warming up --------------------------------------
           json_pure     1.677k i/100ms
           fast_json     3.084k i/100ms
         faster_json     3.781k i/100ms
Calculating -------------------------------------
           json_pure     16.840k (± 0.3%) i/s -     85.527k in   5.078829s
           fast_json     30.732k (± 0.4%) i/s -    154.200k in   5.017587s
         faster_json     37.753k (± 0.2%) i/s -    189.050k in   5.007561s

Comparison:
         faster_json:    37753.0 i/s
           fast_json:    30732.4 i/s - 1.23x  slower
           json_pure:    16840.1 i/s - 2.24x  slower

truffleruby 23.0.0-dev-6e40a2aa, like ruby 3.1.3, GraalVM EE JVM [x86_64-linux]
Warming up --------------------------------------
           json_pure   323.000  i/100ms
           fast_json   103.000  i/100ms
         faster_json     2.760k i/100ms
Calculating -------------------------------------
           json_pure     40.701k (±24.1%) i/s -    185.402k in   4.991525s
           fast_json     56.975k (±18.1%) i/s -    267.697k in   4.993331s
         faster_json     63.914k (± 8.7%) i/s -    317.400k in   5.034377s

Comparison:
         faster_json:    63914.4 i/s
           fast_json:    56974.7 i/s - same-ish: difference falls within error
           json_pure:    40701.5 i/s - 1.57x  slower

So significantly faster than json_pure for both, great!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment