Skip to content

Instantly share code, notes, and snippets.

@welly87
Created October 2, 2020 02:33
Show Gist options
  • Save welly87/835eb53fe7f48b0bae5f6e3db0441500 to your computer and use it in GitHub Desktop.
Save welly87/835eb53fe7f48b0bae5f6e3db0441500 to your computer and use it in GitHub Desktop.
@welly87
Copy link
Author

welly87 commented Oct 2, 2020

!chmod +x squdu/prepare_env.sh
!squdu/prepare_env.sh

@welly87
Copy link
Author

welly87 commented Oct 2, 2020

import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.7-bin-hadoop2.7"
os.environ["PATH"] += ":/content/spark-2.4.7-bin-hadoop2.7/bin"

os.environ["SOURCE_TABLE"] = "Product"
os.environ["KUDU_TABLE_NAME"] = "Product"
os.environ["PRIMARY_KEY"] = "ProductID"

os.environ["IMPALA_TABLE_NAME"] = "welly.product"
os.environ["SPARK_CMD_ARGS"] = '%s %s %s'%(os.environ["SOURCE_TABLE"], os.environ["KUDU_TABLE_NAME"], os.environ["PRIMARY_KEY"]) 

@welly87
Copy link
Author

welly87 commented Oct 2, 2020

%cd squdu/
!chmod +x ./ingest.sh
!./ingest.sh

@welly87
Copy link
Author

welly87 commented Oct 2, 2020

:q

@welly87
Copy link
Author

welly87 commented Oct 2, 2020


from impala.dbapi import connect
from impala.util import as_pandas

import sys
import os

conn = connect(host='178.128.112.105', port=21050) 
cursor = conn.cursor()

kudu_table_name = os.environ['KUDU_TABLE_NAME']
impala_table_name = os.environ['IMPALA_TABLE_NAME']

create_statment = 'CREATE EXTERNAL TABLE IF NOT EXISTS ' + impala_table_name + ' STORED AS KUDU TBLPROPERTIES("kudu.table_name" = "' + kudu_table_name + '");'

cursor.execute(create_statment)
print("table created")

cursor.execute('SELECT count(*) FROM ' + impala_table_name)

df = as_pandas(cursor)

print(df.head())

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment