Created
June 5, 2024 17:04
-
-
Save billdueber/94b4a1315473411799a584c43f7ed1b2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "benchmark/ips" | |
require "marc" | |
require "../lib/marc/jsonl_reader" | |
require "zinzout" | |
source_file = "/Users/dueberb/devel/mlibrary/data/search_full_bibs/all.jsonl.gz" | |
temp_file = "/tmp/500k.bat" | |
require "pathname" | |
# Zinzout.zin(source_file) do |source| | |
# iter = MARC::JSONLReader.new(source).each | |
# writer = MARC::Writer.new(temp_file) | |
# 500_000.times do | |
# begin | |
# writer.write(iter.next) | |
# rescue MARC::Exception | |
# end | |
# end | |
# writer.close | |
# end | |
sample_marcinjson_hash = JSON.parse(Zinzout.zin(source_file).first) | |
def do_a_tiny_amount_of_work(r) | |
title = r["245"].to_s | |
id = r["001"].to_s.gsub(/\D/, "") | |
nums = r.fields("035").map{|f| f.subfields.select{|sf| sf.code == "a"}.map{|sf| sf.value}} | |
end | |
Benchmark.ips do |x| | |
x.config(time: 20, warmup: 5) | |
binary_iterator = MARC::Reader.new(temp_file).each | |
x.report("Just read") do | |
r = binary_iterator.next | |
do_a_tiny_amount_of_work(r) | |
end | |
x.report("With a decode") do | |
r = binary_iterator.next | |
_r = MARC::Record.new_from_hash(sample_marcinjson_hash) | |
do_a_tiny_amount_of_work(r) | |
end | |
x.compare! | |
end | |
# Numbers when we assume that building a marc-in-json-hash in the reader | |
# is just as expensive as building a whole record | |
# | |
# Calculating ------------------------------------- | |
# Just read 6.947k (± 9.8%) i/s - 137.856k in 20.031108s | |
# With a decode 5.867k (± 8.4%) i/s - 116.772k in 20.039091s | |
# | |
# Comparison: | |
# Just read: 6946.7 i/s | |
# With a decode: 5867.0 i/s - same-ish: difference falls within error |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment