Skip to content

Instantly share code, notes, and snippets.

@TransGirlCodes
Created April 25, 2022 14:16
Show Gist options
  • Save TransGirlCodes/85d60b4c8f7ff17d89aa9968e4ca275e to your computer and use it in GitHub Desktop.
Save TransGirlCodes/85d60b4c8f7ff17d89aa9968e4ca275e to your computer and use it in GitHub Desktop.
Benchmarking canonical function for NTuple based Kmers
### A Pluto.jl notebook ###
# v0.17.7
using Markdown
using InteractiveUtils
# ╔═╡ 47e088e3-00ae-46b2-b0ba-325e52156381
import Pkg; Pkg.activate("/Users/bward/repos/github/BioJulia/Kmers.jl")
# ╔═╡ ead5355a-0262-4617-8551-d6b1d21c62bc
using Kmers, BioSequences, BenchmarkTools, DataFrames, Gadfly
# ╔═╡ ed5f28f2-002d-486f-87fc-c3a5e4fce772
a = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{31})))
# ╔═╡ 89723451-2bd2-4f62-97c3-371303861dda
b = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{63})))
# ╔═╡ 880ba600-2d78-4c92-8986-265c82dbf692
c = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{127})))
# ╔═╡ bc707264-802c-4637-965d-2f4ce2d9729f
d = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{201})))
# ╔═╡ fb456d3f-7ea9-4a5a-889d-a81b2713ef67
e = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{301})))
# ╔═╡ b3eed7f6-0f5b-4cce-9f76-11c8b9d368ee
f = @benchmark canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{501})))
# ╔═╡ f100f892-2a52-4186-87ef-5856d2ab6481
canonical_tbl = DataFrame(
min = [minimum(x).time for x in (a, b, c, d, e, f)],
med = [median(x).time for x in (a, b, c, d, e, f)],
max = [maximum(x).time for x in (a, b, c, d, e, f)],
K = [31, 63, 127, 201, 301, 501],
N = [1, 2, 4, 7, 10, 16],
fun = fill("canonical", 6)
)
# ╔═╡ 3de72748-385c-42a9-ad44-f018f4de6e62
@inline canonical2(m::Kmer) = iscanonical(m) ? m : reverse_complement(m)
# ╔═╡ 3befddb7-dec5-42c7-be58-40d01053d57a
A = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{31})))
# ╔═╡ 85209877-a4af-47fb-a208-d8d4ca6f022d
B = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{63})))
# ╔═╡ 4ed5afaa-3bb7-4c3b-9cc4-6a2e563d4368
C = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{127})))
# ╔═╡ 9cb44d49-1c13-49b1-8c18-dc50f5252ecd
D = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{201})))
# ╔═╡ 66528c01-f6bd-483d-9a47-cfdfb5fc3be7
E = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{301})))
# ╔═╡ fad40869-4bb7-4611-843b-490e723be9da
F = @benchmark canonical2(m) setup=(m=rand(Kmers.kmertype(DNAKmer{501})))
# ╔═╡ b89726c8-c774-4175-9f7d-be59b244b367
canonical2_tbl = DataFrame(
min = [minimum(x).time for x in (A, B, C, D, E, F)],
med = [median(x).time for x in (A, B, C, D, E, F)],
max = [maximum(x).time for x in (A, B, C, D, E, F)],
K = [31, 63, 127, 201, 301, 501],
N = [1, 2, 4, 7, 10, 16],
fun = fill("canonical2", 6)
)
@inline function clever_canonical(m::Kmer{A,K,N}) where {A,K,N}
if N < 4
return min(m, reverse_complement(m))
else
return iscanonical(m) ? m : reverse_complement(m)
end
end
cA = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{31})))
cB = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{63})))
cC = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{127})))
cD = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{201})))
cE = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{301})))
cF = @benchmark clever_canonical(m) setup=(m=rand(Kmers.kmertype(DNAKmer{501})))
clever_tbl = DataFrame(
min = [minimum(x).time for x in (cA, cB, cC, cD, cE, cF)],
med = [median(x).time for x in (cA, cB, cC, cD, cE, cF)],
max = [maximum(x).time for x in (cA, cB, cC, cD, cE, cF)],
K = [31, 63, 127, 201, 301, 501],
N = [1, 2, 4, 7, 10, 16],
fun = fill("clever_canonical", 6)
)
# ╔═╡ 8eda9be0-d604-45ac-b3a1-3fc14f3c0103
combined_tbl = vcat(canonical_tbl, canonical2_tbl, clever_tbl)
# ╔═╡ 847b357d-9c1a-4495-afd2-986e4a5ca37a
plot(combined_tbl, x = :N, y = :min, color = :fun, Geom.point)
# ╔═╡ 72f3cd71-95a9-4a91-b568-6824891bacef
plot(combined_tbl, x = :N, y = :med, color = :fun, Geom.point)
plot(combined_tbl, x = :N, y = :max, color = :fun, Geom.point)
# ╔═╡ Cell order:
# ╠═47e088e3-00ae-46b2-b0ba-325e52156381
# ╠═ead5355a-0262-4617-8551-d6b1d21c62bc
# ╠═ed5f28f2-002d-486f-87fc-c3a5e4fce772
# ╠═89723451-2bd2-4f62-97c3-371303861dda
# ╠═880ba600-2d78-4c92-8986-265c82dbf692
# ╠═bc707264-802c-4637-965d-2f4ce2d9729f
# ╠═fb456d3f-7ea9-4a5a-889d-a81b2713ef67
# ╠═b3eed7f6-0f5b-4cce-9f76-11c8b9d368ee
# ╠═f100f892-2a52-4186-87ef-5856d2ab6481
# ╠═3de72748-385c-42a9-ad44-f018f4de6e62
# ╠═3befddb7-dec5-42c7-be58-40d01053d57a
# ╠═85209877-a4af-47fb-a208-d8d4ca6f022d
# ╠═4ed5afaa-3bb7-4c3b-9cc4-6a2e563d4368
# ╠═9cb44d49-1c13-49b1-8c18-dc50f5252ecd
# ╠═66528c01-f6bd-483d-9a47-cfdfb5fc3be7
# ╠═fad40869-4bb7-4611-843b-490e723be9da
# ╠═b89726c8-c774-4175-9f7d-be59b244b367
# ╠═8eda9be0-d604-45ac-b3a1-3fc14f3c0103
# ╠═847b357d-9c1a-4495-afd2-986e4a5ca37a
# ╠═72f3cd71-95a9-4a91-b568-6824891bacef
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment