Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save korkridake/c04a0618395017aea356d2e4fe39c411 to your computer and use it in GitHub Desktop.
Save korkridake/c04a0618395017aea356d2e4fe39c411 to your computer and use it in GitHub Desktop.
# ---------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------
# Resources
# - https://docs.databricks.com/user-guide/libraries.html
# ---------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------
spark
# Out[1]: <pyspark.sql.session.SparkSession at 0x7f214c611ba8>
# Import PySpark Libraries
from pyspark.sql.functions import skewness, kurtosis
from pyspark.sql.functions import var_pop, var_samp, stddev, stddev_pop, sumDistinct, ntile
from pyspark.sql.types import IntegerType
from pyspark.sql.types import StringType
# Step 1: Go to Workspace Directory
# Step 2: Create Library
# Step 3: Upload Python Egg or PyPi
# Step 4: Attach to the Cluster
# Step 5: Check the package installaction whether or not it's successful
import plotly
plotly.__version__
# Out[14]: '3.4.1'
# ---------------------------------------------------------------------------------------
# Sample Code to Run Plotly in Databricks
# ---------------------------------------------------------------------------------------
from plotly.offline import plot
from plotly.graph_objs import *
import numpy as np
x = np.random.randn(2000)
y = np.random.randn(2000)
# Instead of simply calling plot(...), store your plot as a variable and pass it to displayHTML().
# Make sure to specify output_type='div' as a keyword argument.
# (Note that if you call displayHTML() multiple times in the same cell, only the last will take effect.)
p = plot(
[
Histogram2dContour(x=x, y=y, contours=Contours(coloring='heatmap')),
Scatter(x=x, y=y, mode='markers', marker=Marker(color='white', size=3, opacity=0.3))
],
output_type='div'
)
displayHTML(p)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment