Last active
May 9, 2020 12:52
-
-
Save sharpobject/d3b51d41b20517f648d8e7f34fdf23a4 to your computer and use it in GitHub Desktop.
reverse-complement lua program for The Computer Language Benchmarks Game
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- The Computer Language Benchmarks Game | |
-- https://salsa.debian.org/benchmarksgame-team/benchmarksgame/ | |
-- contributed by Mike Pall (with ideas from Rici Lake) | |
-- modified for 5.3 by Robin | |
-- multithread by sharpobject | |
-- arg[2] = worker id if we are a worker, absent if we are the main process | |
local is_worker = arg[2] | |
local always_output = is_worker == "1" | |
-- worker receives: | |
-- line 1 = size of my input chunk | |
-- line 2 = offset in line where i start writing | |
-- line 3 = byte position where i start writing | |
-- then an input chunk of the promised size | |
-- then the same stuff again | |
local sub = string.sub | |
local stdout = io.stdout | |
local fwrite = stdout.write | |
local fread = stdout.read | |
local stdin = io.stdin | |
local fflush = stdout.flush | |
local fclose = stdout.close | |
if is_worker then | |
local iubc = setmetatable({ | |
A="T", C="G", B="V", D="H", K="M", R="Y", | |
a="T", c="G", b="V", d="H", k="M", r="Y", | |
T="A", G="C", V="B", H="D", M="K", Y="R", U="A", | |
t="A", g="C", v="B", h="D", m="K", y="R", u="A", | |
N="N", S="S", W="W", n="N", s="S", w="W", | |
[""] = "", ["\n"] = "", | |
}, { __index = function(t, s) | |
local r = t[sub(s, 2)]..t[sub(s, 1, 1)]; t[s] = r; return r end }) | |
local concat = table.concat | |
local fseek = stdout.seek | |
local function rev(s, offset, t) | |
local n = 0 | |
local hi = #s + offset | |
for i=hi-60,1,-61 do | |
n = n + 1 | |
t[n] = iubc[sub(s, i+57, i+60)].. | |
iubc[sub(s, i+53, i+56)].. | |
iubc[sub(s, i+49, i+52)].. | |
iubc[sub(s, i+45, i+48)].. | |
iubc[sub(s, i+41, i+44)].. | |
iubc[sub(s, i+37, i+40)].. | |
iubc[sub(s, i+33, i+36)].. | |
iubc[sub(s, i+29, i+32)].. | |
iubc[sub(s, i+25, i+28)].. | |
iubc[sub(s, i+21, i+24)].. | |
iubc[sub(s, i+17, i+20)].. | |
iubc[sub(s, i+13, i+16)].. | |
iubc[sub(s, i+09, i+12)].. | |
iubc[sub(s, i+05, i+08)].. | |
iubc[sub(s, i+01, i+04)].. | |
iubc[sub(s, i, i)].."\n" | |
end | |
local r = hi % 61 | |
for i=r,1,-4 do | |
n = n + 1 | |
t[n] = iubc[sub(s, i-3 < 1 and 1 or i-3, i)] | |
end | |
return concat(t, "", 1, n) | |
end | |
local s = fread(stdin, "*l") | |
local jobs = {} | |
local nj = 0 | |
local t = {} | |
local function do_write(job) | |
local out_pos, s = job[1], job[2] | |
fseek(stdout, "set", out_pos) | |
fwrite(stdout, s) | |
end | |
while s ~= "done" do | |
local in_sz = 0 + s | |
local offset = 0 + fread(stdin, "*l") | |
local out_pos = 0 + fread(stdin, "*l") | |
s = fread(stdin, in_sz) | |
assert(#s == in_sz) | |
if always_output then | |
do_write({out_pos, rev(s, offset, t)}) | |
else | |
nj = nj + 1 | |
jobs[nj] = {out_pos, rev(s, offset, t)} | |
end | |
s = fread(stdin, "*l") | |
end | |
for i=1,#jobs do | |
do_write(jobs[i]) | |
end | |
else | |
local N_WORKERS = 5 | |
local find = string.find | |
local workers = {} | |
for i=1,N_WORKERS do | |
workers[i] = io.popen(arg[-1].." "..arg[0].." 0 "..i, "w") | |
end | |
local s = fread(stdin, "*a") | |
fwrite(stdout, s) | |
fflush(stdout) | |
local lo = find(s, "\n", 0, true) | |
local widx = 1 | |
local write_idxs = {} | |
while lo do | |
hi = find(s, ">", lo, true) or #s+1 | |
hi = hi - 1 | |
-- [lo, hi) is a chunk to reverse compliment | |
-- (including a leading newline) | |
-- despite the fact that lo points to the newline in 1-indexed s, | |
-- lo points to the character after the newline for the purpose of | |
-- 0-indexed fseek. | |
-- lua ¯\_(ツ)_/¯ | |
local offset = (hi-lo) % 61 | |
-- if the last line isn't the same length as the others, | |
-- offset = the number of characters on the last line | |
-- (including a leading newline) | |
local worker_chunk = 30000 * 61 | |
local last_chunk = (hi - lo) % worker_chunk | |
if last_chunk == 0 then | |
last_chunk = worker_chunk | |
end | |
local last_chunk_for_pos = last_chunk | |
if offset ~= 0 then | |
last_chunk_for_pos = last_chunk-1 | |
end | |
local nj = ((hi - lo) - last_chunk) // worker_chunk + 1 | |
local idx = lo | |
for i=nj,1,-1 do | |
write_idxs[i] = idx | |
if i == nj then | |
idx = idx + last_chunk_for_pos | |
else | |
idx = idx + worker_chunk | |
end | |
end | |
idx = lo | |
for i=1,nj do | |
local worker = workers[widx] | |
if i == nj then | |
fwrite(worker, last_chunk.."\n") | |
fwrite(worker, "0\n") | |
fwrite(worker, write_idxs[i].."\n") | |
fwrite(worker, sub(s, idx, idx+last_chunk-1)) | |
else | |
local nidx = idx + worker_chunk | |
fwrite(worker, worker_chunk.."\n") | |
fwrite(worker, offset.."\n") | |
fwrite(worker, write_idxs[i].."\n") | |
fwrite(worker, sub(s, idx, nidx-1)) | |
idx = nidx | |
end | |
fflush(worker) | |
widx = (widx % N_WORKERS) + 1 | |
end | |
lo = find(s, "\n", hi+2, true) | |
end | |
for i=1,N_WORKERS do | |
local worker = workers[i] | |
fwrite(workers[i], "done\n") | |
fclose(workers[i]) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment