Skip to content

Instantly share code, notes, and snippets.

@RoyalIcing
Last active December 2, 2023 01:34
Show Gist options
  • Save RoyalIcing/bc37ac2bbae15ab77e20a9ea6dd96860 to your computer and use it in GitHub Desktop.
Save RoyalIcing/bc37ac2bbae15ab77e20a9ea6dd96860 to your computer and use it in GitHub Desktop.
Lemire’s Parsing 8-bit integers quickly in WebAssembly via Orb. https://lemire.me/blog/2023/11/28/parsing-8-bit-integers-quickly/
defmodule ParseU8 do
@moduledoc """
https://lemire.me/blog/2023/11/28/parsing-8-bit-integers-quickly/
"""
# Orb lets you write WebAssembly with friendly Elixir syntax: https://github.com/RoyalIcing/Orb
use Orb
Memory.pages(1)
wasm_mode(U32)
defw parse_uint8_naive(str: I32.String, len: I32), {I32, I32},
i: I32,
r: I32,
d: I32,
n: I32 do
r = len &&& 0x3
loop EachChar do
d = str[at!: i] - ?0
if d > 9 do
0
0
return()
end
n = n * 10 + d
i = i + 1
EachChar.continue(if: i < r)
end
n < 256 &&& len !== 0 &&& len < 4
n
end
defw parse_uint8_fastswar(str: I32.String, len: I32), {I32, I32}, digits: I32, n: I32 do
if len === 0 or len > 3 do
0
0
return()
end
digits =
Memory.load!(I32, str)
# Loads as little-endian
|> I32.xor(0x30303030)
|> I32.shl((4 - len) * 8)
n =
digits
|> I32.mul(0x640A01)
|> I32.shr_u(24)
|> I32.band(0x000000FF)
# Check are valid digits
I32.eqz((digits ||| 0x06060606 + digits) &&& 0xF0F0F0F0)
# Check is <= 255 (after converting to big-endian)
|> I32.band(
I32.rotl(digits |> I32.band(0xFF00FF00), 8)
|> I32.or(I32.rotr(digits |> I32.band(0x00FF00FF), 8)) <= 0x020505
)
n
end
end
(module $ParseU8
(memory (export "memory") 1)
(func $parse_uint8_naive (export "parse_uint8_naive") (param $str i32) (param $len i32) (result i32 i32)
(local $i i32)
(local $r i32)
(local $d i32)
(local $n i32)
(i32.and (local.get $len) (i32.const 3))
(local.set $r)
(loop $EachChar
(i32.sub (i32.load8_u (i32.add (local.get $str) (local.get $i))) (i32.const 48))
(local.set $d)
(i32.gt_u (local.get $d) (i32.const 9))
(if
(then
(i32.const 0)
(i32.const 0)
return
)
)
(i32.add (i32.mul (local.get $n) (i32.const 10)) (local.get $d))
(local.set $n)
(i32.add (local.get $i) (i32.const 1))
(local.set $i)
(i32.lt_u (local.get $i) (local.get $r))
(br_if $EachChar)
)
(i32.and (i32.and (i32.lt_u (local.get $n) (i32.const 256)) (i32.ne (local.get $len) (i32.const 0))) (i32.lt_u (local.get $len) (i32.const 4)))
(local.get $n)
)
(func $parse_uint8_fastswar (export "parse_uint8_fastswar") (param $str i32) (param $len i32) (result i32 i32)
(local $digits i32)
(local $n i32)
(i32.or (i32.eqz (local.get $len)) (i32.gt_u (local.get $len) (i32.const 3)))
(if
(then
(i32.const 0)
(i32.const 0)
return
)
)
(i32.shl (i32.xor (i32.load (local.get $str)) (i32.const 808464432)) (i32.mul (i32.sub (i32.const 4) (local.get $len)) (i32.const 8)))
(local.set $digits)
(i32.and (i32.shr_u (i32.mul (local.get $digits) (i32.const 6556161)) (i32.const 24)) (i32.const 255))
(local.set $n)
(i32.and (i32.eqz (i32.and (i32.or (local.get $digits) (i32.add (i32.const 101058054) (local.get $digits))) (i32.const 4042322160))) (i32.le_u (i32.or (i32.rotl (i32.and (local.get $digits) (i32.const 4278255360)) (i32.const 8)) (i32.rotr (i32.and (local.get $digits) (i32.const 16711935)) (i32.const 8))) (i32.const 132357)))
(local.get $n)
)
)
defmodule ParseU8Test do
use ExUnit.Case, async: true
alias OrbWasmtime.Instance
def naive(str) when is_binary(str) do
i = Instance.run(ParseU8)
Instance.write_memory(i, 0x100, [0, 0, 0, 0])
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list())
Instance.call(i, :parse_uint8_naive, 0x100, byte_size(str))
end
def naive(strings) when is_list(strings) do
multi(strings, :parse_uint8_naive)
end
def fastswar(str) when is_binary(str) do
i = Instance.run(ParseU8)
Instance.write_memory(i, 0x100, [0, 0, 0, 0])
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list())
Instance.call(i, :parse_uint8_fastswar, 0x100, byte_size(str))
end
def fastswar(strings) do
multi(strings, :parse_uint8_fastswar)
end
defp multi(strings, func)
when func in ~w(parse_uint8_naive parse_uint8_fastswar)a do
i = Instance.run(ParseU8)
Stream.map(strings, fn str ->
Instance.write_memory(i, 0x100, [0, 0, 0, 0])
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list())
# Instance.write_string_nul_terminated(i, 0x100, str)
{str, Instance.call(i, func, 0x100, byte_size(str))}
end)
end
test "naive" do
IO.puts(ParseU8.to_wat())
{1, 0} = naive("0")
{1, 1} = naive("1")
{1, 7} = naive("7")
{1, 9} = naive("9")
{1, 12} = naive("12")
{1, 88} = naive("88")
{1, 99} = naive("99")
{1, 120} = naive("120")
{1, 255} = naive("255")
{0, 256} = naive("256")
for result <- naive(for i <- 0..255, do: "#{i}") do
assert {str, {1, i}} = result
assert {i, ""} = Integer.parse(str)
end
for result <- naive(for i <- 256..9999, do: "#{i}") do
assert {_, {0, _}} = result
end
end
defp fuzz_chars() do
0..0xFFFFFF
|> Stream.map(fn b -> <<b::integer-size(32)>> end)
end
test "fastswar" do
{1, 0} = fastswar("0")
{1, 1} = fastswar("1")
{1, 7} = fastswar("7")
{1, 9} = fastswar("9")
{1, 99} = fastswar("99")
{1, 120} = fastswar("120")
{1, 255} = fastswar("255")
{0, _} = fastswar("256")
{0, _} = fastswar("257")
{0, _} = fastswar("258")
{0, _} = fastswar("abc")
{0, _} = fastswar("\0")
{0, _} = fastswar("\0\0\0\0")
for result <- fastswar(for i <- 0..255, do: "#{i}") do
assert {str, {1, i}} = result
assert {i, ""} = Integer.parse(str)
end
for result <- fastswar(for i <- 256..9999, do: "#{i}") do
assert {_, {0, _}} = result
end
fuzz_chars()
|> Stream.take_every(13..17 |> Enum.random())
|> Stream.chunk_every(1024)
|> Stream.each(fn batch ->
fastswar(batch) |> Enum.each(fn result ->
assert {str, result} = result
case Integer.parse(str) do
{i, ""} ->
assert {1, i} = result
:error ->
assert {0, _} = result
end
end)
end)
|> Stream.run()
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment