Created
March 16, 2019 19:11
-
-
Save corneliouzbett/d83e48130381312f1700052de652cea9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql import Row | |
sc = spark.sparkContext | |
# Load a text file and convert each line to a Row. | |
lines = sc.textFile("examples/src/main/resources/people.txt") | |
parts = lines.map(lambda l: l.split(",")) | |
people = parts.map(lambda p: Row(name=p[0], age=int(p[1]))) | |
# Infer the schema, and register the DataFrame as a table. | |
schemaPeople = spark.createDataFrame(people) | |
schemaPeople.createOrReplaceTempView("people") | |
# SQL can be run over DataFrames that have been registered as a table. | |
teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") | |
# The results of SQL queries are Dataframe objects. | |
# rdd returns the content as an :class:`pyspark.RDD` of :class:`Row`. | |
teenNames = teenagers.rdd.map(lambda p: "Name: " + p.name).collect() | |
for name in teenNames: | |
print(name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
spark on fire