This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# x = "wtf" | |
try: | |
print(x) | |
except NameError: | |
print("Variable x is not defined") | |
except: | |
print("Something else went wrong") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import SQLContext | |
from pyspark import SparkContext, SparkConf | |
import os | |
SUBMIT_ARGS = "--packages mysql:mysql-connector-java:5.1.39 pyspark-shell" | |
os.environ["PYSPARK_SUBMIT_ARGS"] = SUBMIT_ARGS | |
conf = SparkConf().setAppName("Flat obs Data flow")\ | |
.setMaster("local[*]") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Start running the query that prints the running counts to the console | |
query = wordCounts \ | |
.writeStream \ | |
.outputMode("complete") \ | |
.format("console") \ | |
.start() | |
query.awaitTermination() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val df = spark.createDataFrame(Seq(( 2, 9), ( 1, 5),( 1, 1),( 1, 2),( 2, 8))) .toDF("y", "x") | |
df.createOrReplaceTempView("test") | |
spark.sql("select CASE WHEN y = 2 THEN 'A' ELSE 'B' END AS flag, x from test").show | |
df:org.apache.spark.sql.DataFrame = [y: integer, x: integer] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Create the dataframe | |
val df = Seq("Red", "Green", "Blue").map(Tuple1.apply).toDF("color") | |
df.createOrReplaceTempView("data") | |
val df4 = sql(""" select *, case when color = 'green' then 1 else 0 end as Green_ind from data """) | |
df4.show() | |
df4:org.apache.spark.sql.DataFrame = [color: string, Green_ind: integer] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.types import * | |
field = [StructField(“FIELDNAME_1”,StringType(), True),StructField(“FIELDNAME_2”, StringType(), True),\ | |
StructField(“FIELDNAME_3”, StringType(), True)] | |
schema = StructType(field) | |
df = sqlContext.createDataFrame(sc.emptyRDD(), schema) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_code(self): | |
code = dedent(u""" | |
from pyspark.sql import SQLContext | |
# Input data | |
sql_context = SQLContext(spark_session.sparkContext) | |
if {in1} is not None: | |
sql_context.registerDataFrameAsTable({in1}, 'ds1') | |
if {in2} is not None: | |
sql_context.registerDataFrameAsTable({in2}, 'ds2') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
vote down vote up | |
def test_fit_maximize_metric(self): | |
sqlContext = SQLContext(self.sc) | |
dataset = sqlContext.createDataFrame([ | |
(10, 10.0), | |
(50, 50.0), | |
(100, 100.0), | |
(500, 500.0)] * 10, | |
["feature", "label"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
spark = SparkSession \ | |
.builder \ | |
.appName("RandomForest") \ | |
.config("spark.executor.heartbeatInterval","60s")\ | |
.getOrCreate() | |
sc = spark.sparkContext | |
sqlContext = SQLContext(sc) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def test_fit_minimize_metric(self): | |
sqlContext = SQLContext(self.sc) | |
dataset = sqlContext.createDataFrame([ | |
(10, 10.0), | |
(50, 50.0), | |
(100, 100.0), | |
(500, 500.0)] * 10, | |
["feature", "label"]) | |
iee = InducedErrorEstimator() |