Skip to content

Instantly share code, notes, and snippets.

@mbiemann
Created March 28, 2022 19:33
Show Gist options
  • Save mbiemann/6d541d2f093de86fc9d1fafbe25410e0 to your computer and use it in GitHub Desktop.
Save mbiemann/6d541d2f093de86fc9d1fafbe25410e0 to your computer and use it in GitHub Desktop.
EMR PySpark Redshit Data API Boto3
import boto3
import time
redshift = boto3.client("redshift-data")
def redshift_sql(query):
resp = redshift.execute_statement(
ClusterIdentifier="xxx",
Database="xxx",
DbUser="xxx",
Sql=query
)
query_id = resp["Id"]
while True:
if redshift.describe_statement(Id=query_id)["Status"] in ["FINISHED"]:
break
else:
time.sleep(5)
resp = redshift.get_statement_result(Id=query_id)
columns = []
for column in resp["ColumnMetadata"]:
columns.append(column["name"])
len_columns = len(columns)
rows = []
for record in resp["Records"]:
row = {}
for i,col in enumerate(columns):
record_col = record[i]
record_val = record_col[list(record_col.keys())[0]]
row[col] = record_val
rows.append(row)
return rows
redshift_sql("select * from pg_user")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment