Last active
April 9, 2023 12:02
-
-
Save ianmcook/37b77a184e43eb566358416214012df8 to your computer and use it in GitHub Desktop.
Simple Ibis Trino demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # before running: | |
| # 1. install Ibis and its Trino backend: https://ibis-project.org/backends/Trino/ | |
| # 2. pull and run the Trino docker container: https://trino.io/docs/current/installation/containers.html | |
| import ibis | |
| from ibis import _ | |
| # connect to Trino | |
| conn = ibis.trino.connect(database='memory', schema='default') | |
| # load the iris flower dataset from a local pandas DataFrame into a Trino table | |
| import seaborn as sns | |
| iris = sns.load_dataset('iris') | |
| conn.create_table('iris', iris) | |
| # assign the Trino iris table to variable t (an Ibis table object) | |
| t = conn.table('iris') | |
| # use the Ibis dataframe API to run a query | |
| (t.filter(_.species.isin(['versicolor', 'virginica'])) | |
| .group_by('species') | |
| .aggregate(avg_petal_length=_.petal_length.mean().round(1)) | |
| .order_by('species') | |
| .execute()) | |
| # use SQL to run a query | |
| conn.sql( | |
| '''SELECT species, AVG(petal_length) AS avg_petal_length | |
| FROM iris | |
| WHERE species IN ('versicolor', 'virginica') | |
| GROUP BY species | |
| ORDER BY species | |
| ''') | |
| # show the SQL query that the Ibis dataframe API generates | |
| print(ibis.to_sql( | |
| t.filter(_.species.isin(['versicolor', 'virginica'])) | |
| .group_by('species') | |
| .aggregate(avg_petal_length=_.petal_length.mean().round(1)) | |
| .order_by('species') | |
| )) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Set
ibis.options.interactive = Trueto display the first 10 rows of the result in a pretty table without callingexecute().execute()returns a pandas DataFrame.to_pandas()is an alias forexecute().You can also call the
to_pyarrow()method to return a PyArrow Table.