shentonfreude · December 7, 2015 20:39 · shentonfreude · Dec 7, 2015
diff --git a/cloudsearch_page_size.py b/cloudsearch_page_size.py
 #!/usr/bin/env python3
 """
 We want to query of a page 'size' of N results and return them, and also a
 'next' and 'previous' link. Those CloudSearch 'start' offsets are based on the
 page (for our app, starting at 1) and the size. So with size=100: page=1 has
 start=0, page=2 has start=100, page=10 has start=100. We don't want to return
 'previous' if we're at page=1, and don't want to return 'next' if it's beyond
 the maximum results 'found'.  So if size=100 and found=543, the last page=5,
 there is no page=6. More generally: lastpage = int(found / size).
 """

 import boto3

 cs = boto3.client('cloudsearchdomain',
                  endpoint_url='https://search-mydomain.us-east-1.cloudsearch.amazonaws.com',
                  region_name='us-east-1')


 def get_results_page(query, page=1, size=3):
    # If page too low, asking CloudSearch will break it, return only 'next'
    if page < 1:
        return {'previous': None, 'next': 1, 'found': 0, 'results': []}
    # page=1 is our app's first but CloudSearch is 0-based
    start = (page - 1) * size
    ret = cs.search(query=query, start=start, size=size)
    found = ret['hits']['found']
    lastpage = int((found + size - 1) / size)
    # If we ask for too high a page, we do get 'found' from results, return it
    if page > lastpage:
        return {'previous': lastpage, 'next': None, 'found': found, 'results': []}
    hit = ret['hits']['hit']
    prevpage = None
    nextpage = None
    if page > 2:
        prevpage = page - 1
    if page < lastpage:
        nextpage = page + 1
    print('<{}] {} ({}) [{}> {}'.format(
        prevpage or 'X', page, start, nextpage or 'X', ' | '.join([h['id'] for h in hit])))
    return {'previous': prevpage,
            'next': nextpage,
            'found': found,
            'results': ' | '.join([h['id'] for h in hit]),
            }

 # Walk the pages starting with 1 and following the 'next' number
 page = 1
 while True:
    res = get_results_page('nasa', page)
    if not res['next']:
        break
    page += 1

 res = get_results_page('nasa', page=1000)
 print('Get page too far: {}'.format(res))
 print('Get prev page: {}'.format(get_results_page('nasa', page=res['previous'])))

 res = get_results_page('nasa', page=0)
 print('Get page too low: {}'.format(res))
 print('Get next page: {}'.format(get_results_page('nasa', page=res['next'])))
	#!/usr/bin/env python3
	"""
	We want to query of a page 'size' of N results and return them, and also a
	'next' and 'previous' link. Those CloudSearch 'start' offsets are based on the
	page (for our app, starting at 1) and the size. So with size=100: page=1 has
	start=0, page=2 has start=100, page=10 has start=100. We don't want to return
	'previous' if we're at page=1, and don't want to return 'next' if it's beyond
	the maximum results 'found'. So if size=100 and found=543, the last page=5,
	there is no page=6. More generally: lastpage = int(found / size).
	"""

	import boto3

	cs = boto3.client('cloudsearchdomain',
	endpoint_url='https://search-mydomain.us-east-1.cloudsearch.amazonaws.com',
	region_name='us-east-1')


	def get_results_page(query, page=1, size=3):
	# If page too low, asking CloudSearch will break it, return only 'next'
	if page < 1:
	return {'previous': None, 'next': 1, 'found': 0, 'results': []}
	# page=1 is our app's first but CloudSearch is 0-based
	start = (page - 1) * size
	ret = cs.search(query=query, start=start, size=size)
	found = ret['hits']['found']
	lastpage = int((found + size - 1) / size)
	# If we ask for too high a page, we do get 'found' from results, return it
	if page > lastpage:
	return {'previous': lastpage, 'next': None, 'found': found, 'results': []}
	hit = ret['hits']['hit']
	prevpage = None
	nextpage = None
	if page > 2:
	prevpage = page - 1
	if page < lastpage:
	nextpage = page + 1
	print('<{}] {} ({}) [{}> {}'.format(
	prevpage or 'X', page, start, nextpage or 'X', ' \| '.join([h['id'] for h in hit])))
	return {'previous': prevpage,
	'next': nextpage,
	'found': found,
	'results': ' \| '.join([h['id'] for h in hit]),
	}

	# Walk the pages starting with 1 and following the 'next' number
	page = 1
	while True:
	res = get_results_page('nasa', page)
	if not res['next']:
	break
	page += 1

	res = get_results_page('nasa', page=1000)
	print('Get page too far: {}'.format(res))
	print('Get prev page: {}'.format(get_results_page('nasa', page=res['previous'])))

	res = get_results_page('nasa', page=0)
	print('Get page too low: {}'.format(res))
	print('Get next page: {}'.format(get_results_page('nasa', page=res['next'])))