Created
December 23, 2018 18:50
-
-
Save navid-kalaei/0c2d2129c9c3a190c48097e93c295f10 to your computer and use it in GitHub Desktop.
Spark Practices and Codes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
_____ __ ____ | |
/ ___/____ ____ ______/ /__ / __ )__ __ | |
\__ \/ __ \/ __ `/ ___/ //_/ / __ / / / / | |
___/ / /_/ / /_/ / / / ,< / /_/ / /_/ / | |
/____/ .___/\__,_/_/ /_/|_| /_____/\__, / | |
/_/ /____/ | |
______ __ __ __ ___ | |
/ ____/___ _/ /____ ____ ___ ___ / /_ / /___ _____ ___ ____ _/ (_) | |
/ /_ / __ `/ __/ _ \/ __ `__ \/ _ \/ __ \ __ / / __ `/ __ `__ \/ __ `/ / / | |
/ __/ / /_/ / /_/ __/ / / / / / __/ / / / / /_/ / /_/ / / / / / / /_/ / / / | |
/_/ \__,_/\__/\___/_/ /_/ /_/\___/_/ /_/ \____/\__,_/_/ /_/ /_/\__,_/_/_/ | |
_ __ _ __ __ __ __ _ | |
/ | / /___ __ __(_)___/ / / //_/___ _/ /___ ____ (_) | |
/ |/ / __ `/ | / / / __ / / ,< / __ `/ / __ `/ _ \/ / | |
/ /| / /_/ /| |/ / / /_/ / / /| / /_/ / / /_/ / __/ / | |
/_/ |_/\__,_/ |___/_/\__,_/ /_/ |_\__,_/_/\__,_/\___/_/ | |
################################### | |
Practice 1: Network Word Count (Stateless) | |
run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999 | |
nc.exe -lp 9999 | |
################################### | |
Practice 2: Compiling Java program | |
cd spark\spark\codes | |
# Generate project | |
mvn archetype:generate -DgroupId=org.apache.spark.examples -DartifactId=JavaNetworkWordCount -DarchetypeArtifactId=maven-archetype-quickstart -DarchetypeVersion=1.4 -DinteractiveMode=false | |
cd JavaNetworkWordCount | |
#copy pom.xml file to project directory | |
# Copy JavaNetworkWordCount the source endpoint | |
# Package project | |
mvn package | |
# Deploy | |
spark-submit --class org.apache.spark.examples.JavaNetworkWordCount --master local[4] target\JavaNetworkWordCount-1.0-SNAPSHOT.jar localhost 9999 | |
nc.exe -lp 9999 | |
#################################### | |
Practice 3: Network Word Count (Stateful) | |
run-example org.apache.spark.examples.streaming.StatefulNetworkWordCount localhost 9999 | |
nc.exe -lp 9999 | |
#################################### | |
Practice 5: PageView | |
# PageViewGenerator | |
run-example org.apache.spark.examples.streaming.clickstream.PageViewGenerator 44444 10 | |
# PageView | |
run-example org.apache.spark.examples.streaming.clickstream.PageViewStream errorRatePerZipCode localhost 44444 | |
#################################### | |
Practice 6: Spark SQL | |
spark-shell --jars MysqlConnector\mysql-connector.jar | |
import com.mysql.cj.jdbc.Driver | |
val sqlContext = new org.apache.spark.sql.SQLContext(sc) | |
val url = "jdbc:mysql://localhost:3306/data" | |
val df = sqlContext.read.format("jdbc").option("driver", "com.mysql.cj.jdbc.Driver").option("user", "fatemeh").option("password", "1234").option("url", url).option("dbtable", "people").load() | |
// Looks the schema of this DataFrame. | |
df.printSchema() | |
val countsByAge = df.groupBy("age").count() | |
countsByAge.show() | |
countsByAge.repartition(1).write.format("json").save("c:/spark/count") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment