Skip to content

Instantly share code, notes, and snippets.

@bicycle1885
Created September 14, 2016 04:23
Show Gist options
  • Save bicycle1885/472c3a7a176d7ebc1a439f312057b7f4 to your computer and use it in GitHub Desktop.
Save bicycle1885/472c3a7a176d7ebc1a439f312057b7f4 to your computer and use it in GitHub Desktop.
Nested Containment List
# Nested Containment List
# =======================
#
# Alekseyenko, A. V., & Lee, C. J. (2007). "Nested Containment List (NCList): A
# new algorithm for accelerating interval query of genome alignment and interval
# databases." Bioinformatics, 23(11), 1386–1393.
# doi:10.1093/bioinformatics/btl647
# An interval type (inclusive).
immutable Interval{T}
start::T
stop::T
end
function Base.isless{T}(i1::Interval{T}, i2::Interval{T})
return isless(i1.start, i2.start) || (i1.start == i2.start && isless(i1.stop, i2.stop))
end
# The Nested Containment List (NCList) type.
type NCList{T<:Interval}
# intervals
data::Vector{T}
# contained intervals
children::Vector{NCList{T}}
end
function NCList{T}(intervals::AbstractVector{T})
if !issorted(intervals)
error("not sorted")
end
data = T[]
children = NCList{T}[]
i = 1
while i ≤ endof(intervals)
j = i
while j + 1 ≤ endof(intervals) && intervals[j+1].start == intervals[i].start
j += 1
end
nested_intervals = T[]
while i ≤ endof(intervals) && intervals[i].stop ≤ intervals[j].stop
if i != j
push!(nested_intervals, intervals[i])
end
i += 1
end
push!(data, intervals[j])
push!(children, NCList(nested_intervals))
end
@assert length(data) == length(children)
return NCList(data, children)
end
function overlap{T}(nclist::NCList{T}, interval::T)
ret = T[]
data = nclist.data
i = searchsortedfirst(data, interval, lt=(x, y) -> isless(x.stop, y.start))
while i ≤ endof(data) && data[i].start ≤ interval.stop
push!(ret, data[i])
append!(ret, overlap(nclist.children[i], interval))
i += 1
end
return ret
end
function load_gff3_intervals(filepath, chrom)
intervals = Interval{Int}[]
open(filepath) do input
for line in eachline(input)
c, _, _, start, stop, = split(line, '\t')
if c == chrom
push!(intervals, Interval(parse(Int, start), parse(Int, stop)))
end
end
end
sort!(intervals)
return intervals
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment