Last active
December 17, 2016 10:27
-
-
Save gruzovator/6afb687b8d854c5ac916b58968f683ac to your computer and use it in GitHub Desktop.
pagination for periodically updated collection of sorted docs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
Pagination model for sorted docs collection, that is periodically updated. | |
The key ideas: | |
- all docs have sequential id | |
- docs are sorted by composite key (weight, sequential id) | |
- response to page request containse 'next_offset' (a composite key) that should be used to get next page | |
""" | |
import bisect | |
import time | |
from collections import namedtuple | |
from random import randint | |
from threading import Lock | |
from threading import Thread | |
class Doc(namedtuple('Doc', 'seq_id, weight')): # every doc has sequential id | |
def __repr__(self): | |
return '{w:%d, s:%d}' % (self.weight, self.seq_id) | |
def docs_source(docs_collection): | |
"""Docs source model | |
Should be run in thread. Generates docs with sequential id and weight attributes. | |
""" | |
seq_id = 0 | |
while True: | |
seq_id += 1 | |
# weight model (importance + freshness) | |
weight = randint(0, 10) # doc content weight emulation | |
weight += seq_id # time component of weight, recent docs are more valuable | |
docs_collection.add(Doc(seq_id, weight)) | |
time.sleep(randint(0, 5)) | |
class DocsCollection(object): | |
def __init__(self, maxsize=6): | |
self.sorted_keys = [] | |
self.docs = {} | |
self.maxsize = maxsize | |
self.__mtx = Lock() | |
def add(self, doc): | |
with self.__mtx: | |
key = (doc.weight, doc.seq_id) | |
assert key not in self.docs # keys are unique cause have a seq_id part | |
bisect.insort(self.sorted_keys, key) | |
self.docs[key] = doc | |
if len(self.sorted_keys) > self.maxsize: | |
key = self.sorted_keys.pop(0) | |
del self.docs[key] | |
def page(self, offset_key=None, limit=2): | |
""" Page request | |
:param offset_key: None for first page | |
:param limit: page docs number | |
:return: tuple of page docs, next offset key and all docs (to view current state) | |
""" | |
with self.__mtx: | |
page_docs = [] | |
next_offset = None | |
pos = bisect.bisect_left(self.sorted_keys, offset_key) if offset_key else 0 | |
try: | |
for i in range(pos, pos + limit): | |
page_docs.append(self.docs[self.sorted_keys[i]]) | |
except IndexError: | |
pass | |
try: | |
next_offset = self.sorted_keys[pos + limit] | |
except IndexError: | |
pass | |
all_docs = [self.docs[k] for k in self.sorted_keys] | |
return (page_docs, next_offset, all_docs) | |
if __name__ == '__main__': | |
docs = DocsCollection(6) | |
src_thread = Thread(target=docs_source, args=[docs]) | |
src_thread.daemon = True | |
src_thread.start() | |
# User reads docs | |
while True: | |
page_n = 0 | |
next_offset = None | |
while True: | |
page_docs, next_offset, all_docs = docs.page(next_offset) | |
page_n += 1 | |
print 'User reads docs on page', page_n | |
print '<ALL DOCS: %s>' % all_docs | |
print '<PAGE: %s>' % page_docs | |
print '<NEXT OFFSET: %s>' % str(next_offset) | |
print 'reading ...' | |
time.sleep(5) | |
if next_offset is None: | |
print 'No more docs. User press F5 or leaves the site' | |
break | |
else: | |
'User press "Next page"' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment