Last active
May 22, 2018 15:07
-
-
Save eliasah/0d206557dca9e982463104dd1f3d2ff1 to your computer and use it in GitHub Desktop.
PySpark Hidden Rest API example - adapted from https://gist.github.com/arturmkrtchyan/5d8559b2911ac951d34a
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
curl -X POST http://[spark-cluster-ip]:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data '{ | |
"action":"CreateSubmissionRequest", | |
"appArgs":[ | |
"/home/eliasah/Desktop/spark_pi.py" | |
], | |
"appResource":"file:/home/eliasah/Desktop/spark_pi.py", | |
"clientSparkVersion":"2.2.1", | |
"environmentVariables":{ | |
"SPARK_ENV_LOADED":"1" | |
}, | |
"mainClass":"org.apache.spark.deploy.SparkSubmit", | |
"sparkProperties":{ | |
"spark.driver.supervise":"false", | |
"spark.app.name":"Simple App", | |
"spark.eventLog.enabled":"true", | |
"spark.submit.deployMode":"cluster", | |
"spark.master":"spark://[spark-master]:6066" | |
} | |
}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"action" : "CreateSubmissionResponse", | |
"message" : "Driver successfully submitted as driver-20180522165321-0001", | |
"serverSparkVersion" : "2.2.1", | |
"submissionId" : "driver-20180522165321-0001", | |
"success" : true | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import print_function | |
import sys | |
from random import random | |
from operator import add | |
from pyspark.sql import SparkSession | |
if __name__ == "__main__": | |
""" | |
Usage: pi [partitions] | |
""" | |
spark = SparkSession\ | |
.builder\ | |
.appName("PythonPi")\ | |
.getOrCreate() | |
partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 | |
n = 100000 * partitions | |
def f(_): | |
x = random() * 2 - 1 | |
y = random() * 2 - 1 | |
return 1 if x ** 2 + y ** 2 <= 1 else 0 | |
count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add) | |
print("Pi is roughly %f" % (4.0 * count / n)) | |
spark.stop() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment