Created
January 24, 2018 11:56
-
-
Save gary-liguoliang/3949b551766d0f06c82c7133dc5f877d to your computer and use it in GitHub Desktop.
Spark load and join CSV files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.test; | |
import org.apache.spark.SparkConf; | |
import org.apache.spark.api.java.JavaSparkContext; | |
import org.apache.spark.sql.Dataset; | |
import org.apache.spark.sql.Row; | |
import org.apache.spark.sql.SparkSession; | |
import java.util.List; | |
import java.util.regex.Pattern; | |
public class SparkHello { | |
private static final Pattern SPACE = Pattern.compile(" "); | |
public static void main(String[] args) { | |
// Define a configuration to use to interact with Spark | |
SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App"); | |
// Create a Java version of the Spark Context from the configuration | |
JavaSparkContext sc = new JavaSparkContext(conf); | |
SparkSession sparkSession = SparkSession.builder().appName("test").getOrCreate(); | |
Dataset<Row> left = sparkSession.read().option("header", true).csv("/tmp/side.left"); | |
Dataset<Row> right = sparkSession.read().option("header", true).csv("/tmp/side.right"); | |
Dataset<Row> j = left.join(right, "id"); | |
List<Row> l = j.collectAsList(); | |
for (Row row : l) { | |
System.out.println(row.mkString()); | |
} | |
sc.stop(); | |
} | |
} |
Author
gary-liguoliang
commented
Jan 24, 2018
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment