Skip to content

Instantly share code, notes, and snippets.

@larsyencken
Last active February 7, 2022 10:57
Show Gist options
  • Save larsyencken/5925cb37cf41f0210eb4f745c012e218 to your computer and use it in GitHub Desktop.
Save larsyencken/5925cb37cf41f0210eb4f745c012e218 to your computer and use it in GitHub Desktop.
BigQuery cell magic for Jupyter notebook
# -*- coding: utf-8 -*-
#
# 01-bigquery.py
#
"""
Cell magic for executing BigQuery queries using %%bq and returning Pandas data frames.
Add BIGQUERY_PROJECT_ID to your environment specify a default project id. Otherwise, you can
specify one by setting PROJECT_ID in an earlier cell of your notebook.
Add this file to ~/.ipython/profile_default/startup/
Ensure the following packages are installed:
- pandas
- google-cloud-bigquery
- pandas-gbq
"""
from IPython.core.magic import Magics, magics_class, cell_magic, needs_local_scope
from IPython.core.magic_arguments import magic_arguments, argument, parse_argstring
from IPython.core.interactiveshell import InteractiveShell
from os import environ
try:
from google.cloud import bigquery
from pandas.io import gbq
@magics_class
class BigQueryMagics(Magics):
def __init__(self, shell: InteractiveShell):
super().__init__(shell)
self._bq = None
@property
def client(self):
"Lazy load a BigQuery client."
if not self._bq:
self._bq = bigquery.Client()
return self._bq
@magic_arguments()
@argument("--output", "-o", help="Store result in a variable, defaults to df")
@argument("--index", "-i", help="Set the given field as an index")
@argument("--project-id", "-p", help="Set the project to use")
@argument("--display", "-d", help="Display the output", action="store_true")
@argument("--transpose", "-t", help="Transpose the output", action="store_true")
@cell_magic
def bq(self, line, cell):
args = parse_argstring(self.bq, line)
# df = self.client.query(cell).result().to_dataframe()
default_project_id = globals().get("PROJECT_ID") or environ.get(
"BIGQUERY_PROJECT_ID"
)
project_id = args.project_id or default_project_id
df = gbq.read_gbq(cell, dialect="standard", project_id=project_id)
if args.index:
df.set_index(args.index, inline=True)
if args.transpose:
df = df.T
if args.output is not None:
self.shell.push({args.output: df})
else:
self.shell.push({"df": df})
if args.display:
display(df)
ip = get_ipython()
ip.register_magics(BigQueryMagics)
except ImportError:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment