Skip to content

Instantly share code, notes, and snippets.

@jvacek
Created August 9, 2024 17:27
Show Gist options
  • Save jvacek/61035af4463d5dc3df14979703c9d0cf to your computer and use it in GitHub Desktop.
Save jvacek/61035af4463d5dc3df14979703c9d0cf to your computer and use it in GitHub Desktop.
Python BigQuery client paginate SQL results via generator
from google.cloud import bigquery
def fetch_rows(batch_size: int):
"""Allows you to not store the entire BQ table as results in memory, and paginate over in batches instead through a generator"""
client = bigquery.Client(project='my.project)
job_config = bigquery.job.QueryJobConfig()
job_config.use_query_cache = True
query_string = "Select * from table"
# Send the query to BQ
query = client.query(query_string, job_config=job_config)
# Tell BQ to compute
query.result()
# get the table where results are _temporarily_ stored
result_table = client.get_table(query.destination)
# Iterate over the rows in the table, chunk them and yield them
index_counter = 0
while rows := client.list_rows(result_table, max_results=batch_size, start_index=index_counter):
ret = list(rows)
if rows.num_results <= 0:
break
index_counter += self.batch_size
yield ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment