Last active
February 7, 2022 10:57
-
-
Save larsyencken/5925cb37cf41f0210eb4f745c012e218 to your computer and use it in GitHub Desktop.
BigQuery cell magic for Jupyter notebook
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
# | |
# 01-bigquery.py | |
# | |
""" | |
Cell magic for executing BigQuery queries using %%bq and returning Pandas data frames. | |
Add BIGQUERY_PROJECT_ID to your environment specify a default project id. Otherwise, you can | |
specify one by setting PROJECT_ID in an earlier cell of your notebook. | |
Add this file to ~/.ipython/profile_default/startup/ | |
Ensure the following packages are installed: | |
- pandas | |
- google-cloud-bigquery | |
- pandas-gbq | |
""" | |
from IPython.core.magic import Magics, magics_class, cell_magic, needs_local_scope | |
from IPython.core.magic_arguments import magic_arguments, argument, parse_argstring | |
from IPython.core.interactiveshell import InteractiveShell | |
from os import environ | |
try: | |
from google.cloud import bigquery | |
from pandas.io import gbq | |
@magics_class | |
class BigQueryMagics(Magics): | |
def __init__(self, shell: InteractiveShell): | |
super().__init__(shell) | |
self._bq = None | |
@property | |
def client(self): | |
"Lazy load a BigQuery client." | |
if not self._bq: | |
self._bq = bigquery.Client() | |
return self._bq | |
@magic_arguments() | |
@argument("--output", "-o", help="Store result in a variable, defaults to df") | |
@argument("--index", "-i", help="Set the given field as an index") | |
@argument("--project-id", "-p", help="Set the project to use") | |
@argument("--display", "-d", help="Display the output", action="store_true") | |
@argument("--transpose", "-t", help="Transpose the output", action="store_true") | |
@cell_magic | |
def bq(self, line, cell): | |
args = parse_argstring(self.bq, line) | |
# df = self.client.query(cell).result().to_dataframe() | |
default_project_id = globals().get("PROJECT_ID") or environ.get( | |
"BIGQUERY_PROJECT_ID" | |
) | |
project_id = args.project_id or default_project_id | |
df = gbq.read_gbq(cell, dialect="standard", project_id=project_id) | |
if args.index: | |
df.set_index(args.index, inline=True) | |
if args.transpose: | |
df = df.T | |
if args.output is not None: | |
self.shell.push({args.output: df}) | |
else: | |
self.shell.push({"df": df}) | |
if args.display: | |
display(df) | |
ip = get_ipython() | |
ip.register_magics(BigQueryMagics) | |
except ImportError: | |
pass |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment