Created
January 15, 2018 09:20
-
-
Save ContrastingSounds/5de9d7cb7abfc468fab069b0121a0cc9 to your computer and use it in GitHub Desktop.
Streams data to an existing Google BigQuery table.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
from google.cloud import bigquery | |
SERVICE_ACCOUNT = '/path/to/credentials/file/<role>-<project>.json' | |
BQ_DATASET = 'state_data' | |
client = bigquery.Client.from_service_account_json(SERVICE_ACCOUNT) | |
dataset = BQ_DATASET | |
logger = logging.getLogger() | |
def stream_data(client: bigquery.Client, dataset_name: str, table_name: str, data: list): | |
""" | |
Given the necessary credentials and references, streams data to a BigQuery table | |
:param client: a google.cloud.bigquery.Client from the Google SDK. | |
:param dataset_name: name of target dataset | |
:param table_name: name of target datatable | |
:param data: list of dictionaries containing the data records | |
:return: | |
""" | |
dataset_ref = client.dataset(dataset_name) | |
table_ref = dataset_ref.table(table_name) | |
# Get the table from the API so that the schema is available. | |
table = client.get_table(table_ref) | |
# BigQuery imposes a limit on the size of each stream | |
# It can be useful during debugging to check on how much you are sending | |
# A complete design should split large datasets into chunks before calling the streaming function | |
logger.debug(f'stream_data() for {len(data)} rows') | |
errors = client.create_rows(table, data) | |
if errors: | |
logger.info(f'BigQuery Streaming Errors: {errors}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment