adjam · September 1, 2016 00:48
diff --git a/pager.py b/pager.py
 #!/usr/bin/python

 from random import randint

 class QueryResult(object):
    """Mock representation of a set of matching results to a query"""
    def __init__(self,query,max_results):
        self.query = query
        self.count = randint(1,max_results)

    def get_results(self,start,page_size=10):
        current = start
        docs = []
        # this dict looks sort of like a page of results from Solr
        result = { "query" : self.query, "start": start, "numFound": self.count, "docs": docs }
        while current < self.count and len(docs) < page_size:
            docs.append({"id": "doc-{}".format(current+1),
                         "title" : "Title #{}".format(current+1)})
            current += 1
        return result 
        
 class MockServer(object):
    """Mock Solr server; remembers doc counts for queries and can return results in a given batch size"""
    def __init__(self,doc_count=5000):
        self.doc_count = doc_count
        self.active_queries = {}

    def query(self,query,start=0,page_size=10):
        if query in self.active_queries:
            result = self.active_queries[query]
        else:
            result = QueryResult(query,self.doc_count)
            self.active_queries[query] = result
        return result.get_results(start,page_size)

 # everything above this line is a simulation of a thing that delivers
 # results in chunks like a Solr server
            

 def iterate_results(query,page_size):
    """Wrapper over mocks above that can be iterated over to retrieve each matching document in turn"""

    # with real Solr library, the next line would be where you establish
    # a connection to the server
    server = MockServer()
    # initial query; gives us number of hits and a batch of results
    results = server.query(query)
    last_index = 0
    num_found = results['numFound']
    print("Initial query says {} hits for '{}'".format(num_found,query))
    while last_index < num_found:
        for doc in results['docs']:
            yield doc
        last_index = results['start'] + len(results['docs'])
        results = server.query(query,last_index)

   
 # and now ... we iterate! 

 if __name__ == '__main__':
    import sys
    # invocation: ./pager.py [query] [page size]
    query = len(sys.argv) > 1 and sys.argv[1] or 'some_query'
    page_size = 10
    if len(sys.argv) > 2:
        try:
            page_size = int(sys.argv[2])
        except:
            pass

    index = 0
    result_modulus = 12
    
    print("printing every {}th result".format(result_modulus))
    for doc in iterate_results(query,page_size):
        if index % result_modulus == 0:
            print("\t{}".format(doc['title']))
        index += 1
    print("Last doc: {}".format(doc['title']))
	#!/usr/bin/python

	from random import randint

	class QueryResult(object):
	"""Mock representation of a set of matching results to a query"""
	def __init__(self,query,max_results):
	self.query = query
	self.count = randint(1,max_results)

	def get_results(self,start,page_size=10):
	current = start
	docs = []
	# this dict looks sort of like a page of results from Solr
	result = { "query" : self.query, "start": start, "numFound": self.count, "docs": docs }
	while current < self.count and len(docs) < page_size:
	docs.append({"id": "doc-{}".format(current+1),
	"title" : "Title #{}".format(current+1)})
	current += 1
	return result

	class MockServer(object):
	"""Mock Solr server; remembers doc counts for queries and can return results in a given batch size"""
	def __init__(self,doc_count=5000):
	self.doc_count = doc_count
	self.active_queries = {}

	def query(self,query,start=0,page_size=10):
	if query in self.active_queries:
	result = self.active_queries[query]
	else:
	result = QueryResult(query,self.doc_count)
	self.active_queries[query] = result
	return result.get_results(start,page_size)

	# everything above this line is a simulation of a thing that delivers
	# results in chunks like a Solr server


	def iterate_results(query,page_size):
	"""Wrapper over mocks above that can be iterated over to retrieve each matching document in turn"""

	# with real Solr library, the next line would be where you establish
	# a connection to the server
	server = MockServer()
	# initial query; gives us number of hits and a batch of results
	results = server.query(query)
	last_index = 0
	num_found = results['numFound']
	print("Initial query says {} hits for '{}'".format(num_found,query))
	while last_index < num_found:
	for doc in results['docs']:
	yield doc
	last_index = results['start'] + len(results['docs'])
	results = server.query(query,last_index)


	# and now ... we iterate!

	if __name__ == '__main__':
	import sys
	# invocation: ./pager.py [query] [page size]
	query = len(sys.argv) > 1 and sys.argv[1] or 'some_query'
	page_size = 10
	if len(sys.argv) > 2:
	try:
	page_size = int(sys.argv[2])
	except:
	pass

	index = 0
	result_modulus = 12

	print("printing every {}th result".format(result_modulus))
	for doc in iterate_results(query,page_size):
	if index % result_modulus == 0:
	print("\t{}".format(doc['title']))
	index += 1
	print("Last doc: {}".format(doc['title']))