Skip to content

Instantly share code, notes, and snippets.

@fclesio
Created October 29, 2020 21:38
Show Gist options
  • Save fclesio/35a855ef5bd1e6e5d988c3f51d4d6644 to your computer and use it in GitHub Desktop.
Save fclesio/35a855ef5bd1e6e5d988c3f51d4d6644 to your computer and use it in GitHub Desktop.
import awswrangler as wr
import pandas as pd
df = pd.DataFrame({"id": [1, 2], "value": ["foo", "boo"]})
# Armazenando os dados no Data Lake
wr.s3.to_parquet(
df=df,
path="s3://bucket/dataset/",
dataset=True,
database="my_db",
table="my_table"
)
# Pegando os dados diretamente do S3
df = wr.s3.read_parquet("s3://bucket/dataset/", dataset=True)
# Peganndo os dados do Amazon Athena
df = wr.athena.read_sql_query("SELECT * FROM my_table", database="my_db")
# Usa a conexão do Redshift via SQLAlchemy vinda do Glue e pega as informações do Redshift Spectrum
engine = wr.catalog.get_engine("my-redshift-connection")
df = wr.db.read_sql_query("SELECT * FROM external_schema.my_table", con=engine)
# Pega a conexão do MySQL via SQLAlchemy do catálogo do Glue e carrega as informações no MySQL
engine = wr.catalog.get_engine("my-mysql-connection")
wr.db.to_sql(df, engine, schema="test", name="my_table")
# Pega a conexão do PostgreSQL via SQLAlchemy do catálogo do Glue e carrega as informações no PostgreSQL
engine = wr.catalog.get_engine("my-postgresql-connection")
wr.db.to_sql(df, engine, schema="test", name="my_table")
# Fonte: https://github.com/awslabs/aws-data-wrangler
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment