Skip to content

Instantly share code, notes, and snippets.

@oxinabox
Created March 9, 2017 16:10
Show Gist options
  • Save oxinabox/c8ef9efa9c3d7630c876dd42797bba74 to your computer and use it in GitHub Desktop.
Save oxinabox/c8ef9efa9c3d7630c876dd42797bba74 to your computer and use it in GitHub Desktop.
An Iterator for Sliding Windows. Probably originally based on something from Word2Vec.jl
type SlidingWindow
ws::Any #Normally a word stream
lsize::Int64
rsize::Int64
end
Base.iteratorsize(::Type{SlidingWindow}) = Base.SizeUnknown()
window_length(window) = window.lsize + 1 + window.rsize
function Base.start(window::SlidingWindow)
ws_state = start(window.ws)
words = String["" for ii in 1:window_length(window)]
for ii in 0:window_length(window)-2
_,(words,ws_state) = next(window, (words, ws_state))
end
(words, ws_state)
end
function Base.done(window::SlidingWindow, state)
(words, ws_state) = state
done(window.ws, ws_state) #CHECKME: Is this getting all the words I want at the end?
end
function Base.next(window::SlidingWindow, state)
(words, ws_state) = state
next_word, ws_state = next(window.ws, ws_state)
push!(words, next_word)
words = words[2:end] #PREMOPT: could use a circular bufffer
(filter(w->length(w)>0, words), (words,ws_state))
end
function sliding_window(words; lsize=5, rsize=5)
SlidingWindow(words, lsize, rsize)
end
using FactCheck
facts("Should get a sliding window") do
ww=words_of(data)
windows = collect(sliding_window(ww,lsize=1,rsize=1))
@fact windows[1] --> String["the","king","and"]
@fact windows[2] --> String["king","and", "his"]
bigger_windows = collect(sliding_window(ww,lsize=2,rsize=2))
@fact bigger_windows[end] --> String["find","the","land","of","gold"]
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment