Skip to content

Instantly share code, notes, and snippets.

@mndrake
Last active May 2, 2020 00:22
Show Gist options
  • Save mndrake/492dbae06fcfb6c18c06a4c84f0a88c0 to your computer and use it in GitHub Desktop.
Save mndrake/492dbae06fcfb6c18c06a4c84f0a88c0 to your computer and use it in GitHub Desktop.
Setup Jupyter kernel for Databricks dbconnect
from IPython.core.magic import line_magic, line_cell_magic, Magics, magics_class
from pyspark.sql import SparkSession
from pyspark.dbutils import DBUtils
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
dbutils = DBUtils(sc)
@magics_class
class DatabricksConnectMagics(Magics):
@line_cell_magic
def sql(self, line, cell=None):
if cell and line:
raise ValueError("Line must be empty for cell magic", line)
try:
from autovizwidget.widget.utils import display_dataframe
except ImportError:
print("Please run `pip install autovizwidget` to enable the visualization widget.")
display_dataframe = lambda x: x
return display_dataframe(self.get_spark().sql(cell or line).toPandas())
def get_spark(self):
user_ns = get_ipython().user_ns
if "spark" in user_ns:
return user_ns["spark"]
else:
from pyspark.sql import SparkSession
user_ns["spark"] = SparkSession.builder.getOrCreate()
return user_ns["spark"]
ip = get_ipython()
ip.register_magics(DatabricksConnectMagics)

assumes that there is already a conda environment named dbconnect

python -m ipykernel install --user --name dbconnect --display-name "dbconnect".

~/Library/Jupyter/kernels/dbconnect/kernel.json
~/.ipython/profile_dbconnect/startup/00-setup.py

name: dbconnect
dependencies:
- python=3.7
- jupyter
- pip:
- databricks-connect==6.2.*
- autovizwidget
- pylint
- six
- databricks-cli
- pyarrow==0.13.0
{
"argv": [
"/usr/local/anaconda3/envs/dbconnect/bin/python",
"-m",
"ipykernel",
"-f",
"{connection_file}",
"--profile",
"dbconnect"
],
"display_name": "dbconnect",
"language": "python"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment