Skip to content

Instantly share code, notes, and snippets.

@maciejskorski
Last active April 4, 2020 18:52
Show Gist options
  • Save maciejskorski/4dcc81b0a995e0eaf701b2c36b8895a1 to your computer and use it in GitHub Desktop.
Save maciejskorski/4dcc81b0a995e0eaf701b2c36b8895a1 to your computer and use it in GitHub Desktop.
skipgram generator
from itertools import islice,chain
from collections import deque
def gen_skipgrams(itr,window=1,symmetric=False,Q=None):
itr = iter(itr)
if not Q:
Q = deque(islice(itr,window-1),maxlen=window)
append = Q.append
for i in itr:
for j in Q:
yield (j,i)
if symmetric:
yield (i,j)
append(i)
# text as an iterable over sentences
text = 'I eat oat flakes every day. This is my beloved breakfast. It is also very healthy. '
sentences = text.split('. ')
# iterate over sentences and then over words. this way we do not cross sentence boundaries !!!
words = map(str.split, sentences)
pairs = map(lambda sent: gen_skipgrams(sent,1,deque()),words)
pairs = chain.from_iterable(pairs)
for p in pairs:
print(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment