Created
November 23, 2018 07:56
-
-
Save korkridake/c04a0618395017aea356d2e4fe39c411 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --------------------------------------------------------------------------------------- | |
# --------------------------------------------------------------------------------------- | |
# --------------------------------------------------------------------------------------- | |
# Resources | |
# - https://docs.databricks.com/user-guide/libraries.html | |
# --------------------------------------------------------------------------------------- | |
# --------------------------------------------------------------------------------------- | |
# --------------------------------------------------------------------------------------- | |
spark | |
# Out[1]: <pyspark.sql.session.SparkSession at 0x7f214c611ba8> | |
# Import PySpark Libraries | |
from pyspark.sql.functions import skewness, kurtosis | |
from pyspark.sql.functions import var_pop, var_samp, stddev, stddev_pop, sumDistinct, ntile | |
from pyspark.sql.types import IntegerType | |
from pyspark.sql.types import StringType | |
# Step 1: Go to Workspace Directory | |
# Step 2: Create Library | |
# Step 3: Upload Python Egg or PyPi | |
# Step 4: Attach to the Cluster | |
# Step 5: Check the package installaction whether or not it's successful | |
import plotly | |
plotly.__version__ | |
# Out[14]: '3.4.1' | |
# --------------------------------------------------------------------------------------- | |
# Sample Code to Run Plotly in Databricks | |
# --------------------------------------------------------------------------------------- | |
from plotly.offline import plot | |
from plotly.graph_objs import * | |
import numpy as np | |
x = np.random.randn(2000) | |
y = np.random.randn(2000) | |
# Instead of simply calling plot(...), store your plot as a variable and pass it to displayHTML(). | |
# Make sure to specify output_type='div' as a keyword argument. | |
# (Note that if you call displayHTML() multiple times in the same cell, only the last will take effect.) | |
p = plot( | |
[ | |
Histogram2dContour(x=x, y=y, contours=Contours(coloring='heatmap')), | |
Scatter(x=x, y=y, mode='markers', marker=Marker(color='white', size=3, opacity=0.3)) | |
], | |
output_type='div' | |
) | |
displayHTML(p) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment