|
defmodule Chunktolines do |
|
|
|
## 1. regex split (preserving "\n") |
|
def test_1_regex_split(enum) do |
|
enum |
|
|> Stream.transform("",fn chunk, acc -> |
|
[last_line | lines] = |
|
Regex.split(~r/(?<=\n)/, acc <> chunk) |
|
|> Enum.reverse() |
|
{Enum.reverse(lines),last_line} |
|
end) |
|
|> stream_sum() |
|
end |
|
|
|
## 2. recursive |
|
def test_2_recursive(enum) do |
|
enum |
|
|> Stream.transform("",&next_line/2) |
|
|> stream_sum() |
|
end |
|
|
|
def next_line(chunk,current_line\\""), do: next_line(chunk,current_line,[]) |
|
def next_line(<<"\n"::utf8, rest::binary>>,current_line,lines) do |
|
next_line(rest,"",[current_line | lines]) |
|
end |
|
def next_line(<<c::utf8, rest::binary>>,current_line,lines) do |
|
next_line(rest,<<current_line::binary, c::utf8>>,lines) |
|
end |
|
def next_line(<<>>,current_line,lines), do: {Enum.reverse(lines), current_line} |
|
|
|
|
|
## 3. String.split (loosing "\n") |
|
def test_3_string_split(enum) do |
|
enum |
|
|> Stream.transform("",fn chunk, acc -> |
|
[last_line | lines] = |
|
String.split(acc <> chunk,"\n") |
|
|> Enum.reverse() |
|
{Enum.reverse(lines),last_line} |
|
end) |
|
|> stream_sum() |
|
end |
|
|
|
|
|
|
|
def stream_sum(enum) do |
|
enum |
|
|> Stream.map(fn line-> |
|
{num,_} = Integer.parse(line) |
|
num |
|
end) |
|
|> Enum.sum() |
|
end |
|
|
|
|
|
end |
|
|
|
|
|
numbers_small_lines = File.stream!("numbers_small.txt",[],:line) |
|
numbers_small_chunks = File.stream!("numbers_small.txt",[],2048) |
|
|
|
Benchee.run(%{ |
|
"0_lines" => fn -> |
|
numbers_small_lines |
|
|> Chunktolines.stream_sum() |
|
end, |
|
"1_regex_split" => fn -> |
|
numbers_small_chunks |
|
|> Chunktolines.test_1_regex_split() |
|
end, |
|
"2_recursive" => fn -> |
|
numbers_small_chunks |
|
|> Chunktolines.test_2_recursive() |
|
end, |
|
"3_string_split" => fn -> |
|
numbers_small_chunks |
|
|> Chunktolines.test_3_string_split() |
|
end, |
|
}, |
|
time: 10 |
|
) |