Created
October 1, 2020 04:11
-
-
Save welly87/29472b1f5fc2200f8ceee3678e361e9a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
? |
Author
welly87
commented
Oct 1, 2020
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.7-bin-hadoop2.7"
driver_path = "/content/mysql-connector-java-8.0.14.jar"
os.environ['PYSPARK_SUBMIT_ARGS'] = f'pyspark-shell --driver-class-path {driver_path} --jars {driver_path}'
import findspark
findspark.init("spark-2.4.7-bin-hadoop2.7")
from pyspark.sql import SparkSession
spark = SparkSession \
.builder \
.appName("Bea Cukai - PySpark/Kudu") \
.config('spark.driver.bindAddress','localhost') \
.config('spark.driver.allowMultipleContexts','true') \
.config('spark.sql.execution.arrow.pyspark.enabled', 'true') \
.getOrCreate()
jdbcDF = spark.read \
.format("jdbc") \
.option("url", "jdbc:mysql://relational.fit.cvut.cz/ccs?serverTimezone=UTC") \
.option("driver", "com.mysql.jdbc.Driver") \
.option("dbtable", "transactions_1k") \
.option("user", "guest") \
.option("password", "relational") \
.load()
jdbcDF.createOrReplaceTempView("transactions")
trans = spark.sql("select * from transactions limit where Amount > 20")
trans.count()
tdf = trans.select("*").toPandas()
tdf.head()
SELECT * FROM welly.transactions_1k;
CREATE EXTERNAL TABLE 001_andi.transactions_1k
STORED AS KUDU
TBLPROPERTIES (
'kudu.table_name' = 'transactions_1k'
);
CREATE EXTERNAL TABLE welly.transactions_1k
STORED AS KUDU
TBLPROPERTIES (
'kudu.table_name' = 'transactions_1k'
);
CREATE EXTERNAL TABLE welly.customers
STORED AS KUDU
TBLPROPERTIES (
'kudu.table_name' = 'customers'
);
CREATE EXTERNAL TABLE welly.gasstations
STORED AS KUDU
TBLPROPERTIES (
'kudu.table_name' = 'gasstations'
);
CREATE EXTERNAL TABLE welly.products
STORED AS KUDU
TBLPROPERTIES (
'kudu.table_name' = 'products'
);
!pip install impyla
from impala.dbapi import connect
from impala.util import as_pandas
conn = connect(host='178.128.112.105', port=21050)
cursor = conn.cursor()
cursor.execute('SELECT * FROM welly.transactions_1k LIMIT 100')
df = as_pandas(cursor)
df.head()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment