Skip to content

Instantly share code, notes, and snippets.

@dataders
Last active December 19, 2024 21:55
Show Gist options
  • Save dataders/ef982a2e6eae5d125705717fcce80111 to your computer and use it in GitHub Desktop.
Save dataders/ef982a2e6eae5d125705717fcce80111 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
{
"extensionUris": [
{
"extensionUriAnchor": 1,
"uri": "https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml"
}
],
"extensions": [
{
"extensionFunction": {
"extensionUriReference": 1,
"functionAnchor": 1,
"name": "multiply:i32_i32"
}
}
],
"relations": [
{
"root": {
"input": {
"project": {
"input": {
"read": {
"baseSchema": {
"names": [
"unit_price",
"quantity"
],
"struct": {
"types": [
{
"i32": {
"nullability": "NULLABILITY_NULLABLE"
}
},
{
"i32": {
"nullability": "NULLABILITY_NULLABLE"
}
}
],
"nullability": "NULLABILITY_REQUIRED"
}
},
"projection": {
"select": {
"structItems": [
{},
{
"field": 1
}
]
},
"maintainSingularStruct": true
},
"namedTable": {
"names": [
"payments"
]
}
}
},
"expressions": [
{
"scalarFunction": {
"functionReference": 1,
"outputType": {
"i32": {
"nullability": "NULLABILITY_NULLABLE"
}
},
"arguments": [
{
"value": {
"selection": {
"directReference": {
"structField": {}
},
"rootReference": {}
}
}
},
{
"value": {
"selection": {
"directReference": {
"structField": {
"field": 1
}
},
"rootReference": {}
}
}
}
]
}
}
]
}
},
"names": [
"total_cost"
]
}
}
],
"version": {
"minorNumber": 53,
"producer": "DuckDB"
}
}
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# reprex: duckdb -> pyarrow\n",
"\n",
"trying to use a DuckDB-genrated subait plan on a PyArrow table, but it fails with the following error:\n",
"\n",
"```py\n",
"ArrowNotImplementedError: substrait::ReadRel::projection\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import duckdb\n",
"import json\n",
"import pyarrow as pa\n",
"from pyarrow.lib import tobytes\n",
"import pyarrow.substrait as substrait"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"con = duckdb.connect()\n",
"# con = duckdb.connect('sandbox_substrait.db')\n",
"con.install_extension(\"substrait\")\n",
"con.load_extension(\"substrait\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## v1 DuckDB query -> Substrait plan -> Arrow table"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"queries = {\n",
" \"create\": \"CREATE OR REPLACE TABLE payments (unit_price int, quantity int);\",\n",
" \"insert\": \"INSERT INTO payments VALUES (10, 1000), (5,2000), (3456,100);\",\n",
" \"select\": \"select * from payments;\",\n",
" \"transform\": \"select unit_price * quantity as total_cost from payments;\"\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pyarrow.Table\n",
"unit_price: int32\n",
"quantity: int32\n",
"----\n",
"unit_price: [[10,5,3456]]\n",
"quantity: [[1000,2000,100]]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"con.execute(f'{queries[\"create\"]} {queries[\"insert\"]}')\n",
"ar_payments = con.execute(queries['select']).fetch_arrow_table()\n",
"ar_payments"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"pyarrow.Table\n",
"explain_key: string\n",
"explain_value: string\n",
"----\n",
"explain_key: [[\"physical_plan\"]]\n",
"explain_value: [[\"┌───────────────────────────┐\n",
"│ PROJECTION │\n",
"│ ──────────────────── │\n",
"│ total_cost │\n",
"│ │\n",
"│ ~3 Rows │\n",
"└─────────────┬─────────────┘\n",
"┌─────────────┴─────────────┐\n",
"│ SEQ_SCAN │\n",
"│ ──────────────────── │\n",
"│ payments │\n",
"│ │\n",
"│ Projections: │\n",
"│ unit_price │\n",
"│ quantity │\n",
"│ │\n",
"│ ~3 Rows │\n",
"└───────────────────────────┘\n",
"\"]]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"con.execute(f\"EXPLAIN {queries['transform']}\").fetch_arrow_table()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'extensionUris': [{'extensionUriAnchor': 1,\n",
" 'uri': 'https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml'}],\n",
" 'extensions': [{'extensionFunction': {'extensionUriReference': 1,\n",
" 'functionAnchor': 1,\n",
" 'name': 'multiply:i32_i32'}}],\n",
" 'relations': [{'root': {'input': {'project': {'input': {'read': {'baseSchema': {'names': ['unit_price',\n",
" 'quantity'],\n",
" 'struct': {'types': [{'i32': {'nullability': 'NULLABILITY_NULLABLE'}},\n",
" {'i32': {'nullability': 'NULLABILITY_NULLABLE'}}],\n",
" 'nullability': 'NULLABILITY_REQUIRED'}},\n",
" 'projection': {'select': {'structItems': [{}, {'field': 1}]},\n",
" 'maintainSingularStruct': True},\n",
" 'namedTable': {'names': ['payments']}}},\n",
" 'expressions': [{'scalarFunction': {'functionReference': 1,\n",
" 'outputType': {'i32': {'nullability': 'NULLABILITY_NULLABLE'}},\n",
" 'arguments': [{'value': {'selection': {'directReference': {'structField': {}},\n",
" 'rootReference': {}}}},\n",
" {'value': {'selection': {'directReference': {'structField': {'field': 1}},\n",
" 'rootReference': {}}}}]}}]}},\n",
" 'names': ['total_cost']}}],\n",
" 'version': {'minorNumber': 53, 'producer': 'DuckDB'}}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"plan_str = con.get_substrait_json(query=queries['transform']).fetchone()[0]\n",
"plan_json_full = json.loads(plan_str)\n",
"plan_json_full"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'project': {'input': {'read': {'baseSchema': {'names': ['unit_price',\n",
" 'quantity'],\n",
" 'struct': {'types': [{'i32': {'nullability': 'NULLABILITY_NULLABLE'}},\n",
" {'i32': {'nullability': 'NULLABILITY_NULLABLE'}}],\n",
" 'nullability': 'NULLABILITY_REQUIRED'}},\n",
" 'projection': {'select': {'structItems': [{}, {'field': 1}]},\n",
" 'maintainSingularStruct': True},\n",
" 'namedTable': {'names': ['payments']}}},\n",
" 'expressions': [{'scalarFunction': {'functionReference': 1,\n",
" 'outputType': {'i32': {'nullability': 'NULLABILITY_NULLABLE'}},\n",
" 'arguments': [{'value': {'selection': {'directReference': {'structField': {}},\n",
" 'rootReference': {}}}},\n",
" {'value': {'selection': {'directReference': {'structField': {'field': 1}},\n",
" 'rootReference': {}}}}]}}]}}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# plan_json_full['relations'][0]['root'].keys()\n",
"plan_json_full['relations'][0]['root']['input']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def table_provider(names, schema):\n",
" if not names:\n",
" raise Exception(\"No names provided\")\n",
" elif names[0] == \"payments\":\n",
" return ar_payments\n",
" else:\n",
" raise Exception(\"Unrecognized table name\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"ename": "ArrowNotImplementedError",
"evalue": "substrait::ReadRel::projection",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mArrowNotImplementedError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[10], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m buf \u001b[38;5;241m=\u001b[39m pa\u001b[38;5;241m.\u001b[39m_substrait\u001b[38;5;241m.\u001b[39m_parse_json_plan(tobytes(plan_str))\n\u001b[0;32m----> 2\u001b[0m reader \u001b[38;5;241m=\u001b[39m \u001b[43mpa\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msubstrait\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_query\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbuf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtable_provider\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtable_provider\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m reader\u001b[38;5;241m.\u001b[39mread_all()\n",
"File \u001b[0;32m~/Developer/query-plan-diff/.venv/lib/python3.11/site-packages/pyarrow/_substrait.pyx:156\u001b[0m, in \u001b[0;36mpyarrow._substrait.run_query\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/Developer/query-plan-diff/.venv/lib/python3.11/site-packages/pyarrow/error.pxi:155\u001b[0m, in \u001b[0;36mpyarrow.lib.pyarrow_internal_check_status\u001b[0;34m()\u001b[0m\n",
"File \u001b[0;32m~/Developer/query-plan-diff/.venv/lib/python3.11/site-packages/pyarrow/error.pxi:92\u001b[0m, in \u001b[0;36mpyarrow.lib.check_status\u001b[0;34m()\u001b[0m\n",
"\u001b[0;31mArrowNotImplementedError\u001b[0m: substrait::ReadRel::projection"
]
}
],
"source": [
"buf = pa._substrait._parse_json_plan(tobytes(plan_str))\n",
"reader = pa.substrait.run_query(buf, table_provider=table_provider)\n",
"reader.read_all()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## V2 Ibis -> Substrait plan -> DuckDB & Arrow"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import ibis\n",
"\n",
"from ibis_substrait.compiler.core import SubstraitCompiler"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"unit_price: int32\n",
"quantity: int32"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pay_schema = con.table('payments').arrow().schema #\n",
"pay_schema"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# Define an abstract table (this example uses substrait without it being connected to a database)\n",
"abstract_payments = (\n",
" ibis.table(pay_schema, name='payments')\n",
" .unbind() # Unbinding allows ibis to generate substrait without a catalog\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Define your query using ibis (not ibis-substrait)\n",
"\n",
"# I put this expression here for readability, but it can be in-lined in the query\n",
"total_cost_expr = abstract_payments.unit_price * abstract_payments.quantity"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# This is the actual query, `select` is like \"SELECT\" and we give it an expression\n",
"sample_query = (\n",
" abstract_payments.select(\n",
" (total_cost_expr).name('total_cost')\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/markdown": [
"```sql\n",
"SELECT\n",
" \"t0\".\"unit_price\" * \"t0\".\"quantity\" AS \"total_cost\"\n",
"FROM \"payments\" AS \"t0\"\n",
"```"
],
"text/plain": [
"\u001b[34mSELECT\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
"\u001b[37m \u001b[39;49;00m\u001b[33m\"t0\"\u001b[39;49;00m.\u001b[33m\"unit_price\"\u001b[39;49;00m\u001b[37m \u001b[39;49;00m*\u001b[37m \u001b[39;49;00m\u001b[33m\"t0\"\u001b[39;49;00m.\u001b[33m\"quantity\"\u001b[39;49;00m\u001b[37m \u001b[39;49;00m\u001b[34mAS\u001b[39;49;00m\u001b[37m \u001b[39;49;00m\u001b[33m\"total_cost\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m\n",
"\u001b[34mFROM\u001b[39;49;00m\u001b[37m \u001b[39;49;00m\u001b[33m\"payments\"\u001b[39;49;00m\u001b[37m \u001b[39;49;00m\u001b[34mAS\u001b[39;49;00m\u001b[37m \u001b[39;49;00m\u001b[33m\"t0\"\u001b[39;49;00m\u001b[37m\u001b[39;49;00m"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# If you want to see the SQL\n",
"ibis.to_sql(sample_query)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"extension_uris {\n",
" extension_uri_anchor: 1\n",
" uri: \"https://github.com/substrait-io/substrait/blob/main/extensions/functions_arithmetic.yaml\"\n",
"}\n",
"extensions {\n",
" extension_function {\n",
" extension_uri_reference: 1\n",
" function_anchor: 1\n",
" name: \"multiply:i32_i32\"\n",
" }\n",
"}\n",
"relations {\n",
" root {\n",
" input {\n",
" project {\n",
" common {\n",
" emit {\n",
" output_mapping: 2\n",
" }\n",
" }\n",
" input {\n",
" read {\n",
" common {\n",
" direct {\n",
" }\n",
" }\n",
" base_schema {\n",
" names: \"unit_price\"\n",
" names: \"quantity\"\n",
" struct {\n",
" types {\n",
" i32 {\n",
" nullability: NULLABILITY_NULLABLE\n",
" }\n",
" }\n",
" types {\n",
" i32 {\n",
" nullability: NULLABILITY_NULLABLE\n",
" }\n",
" }\n",
" nullability: NULLABILITY_REQUIRED\n",
" }\n",
" }\n",
" named_table {\n",
" names: \"payments\"\n",
" }\n",
" }\n",
" }\n",
" expressions {\n",
" scalar_function {\n",
" function_reference: 1\n",
" output_type {\n",
" i64 {\n",
" nullability: NULLABILITY_NULLABLE\n",
" }\n",
" }\n",
" arguments {\n",
" value {\n",
" selection {\n",
" direct_reference {\n",
" struct_field {\n",
" }\n",
" }\n",
" root_reference {\n",
" }\n",
" }\n",
" }\n",
" }\n",
" arguments {\n",
" value {\n",
" selection {\n",
" direct_reference {\n",
" struct_field {\n",
" field: 1\n",
" }\n",
" }\n",
" root_reference {\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" names: \"total_cost\"\n",
" }\n",
"}\n",
"version {\n",
" minor_number: 56\n",
" producer: \"ibis-substrait\"\n",
"}"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Ibis-substrait\n",
"# This is a \"Plan\" message.\n",
"# The protobuf description: https://github.com/substrait-io/substrait/blob/main/proto/substrait/plan.proto#L26\n",
"# The API for any Message type: https://googleapis.dev/python/protobuf/latest/google/protobuf/message.html\n",
"proto_msg = SubstraitCompiler().compile(sample_query)\n",
"proto_msg"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"# this gets us a string (see the protobuf Message API link above)\n",
"# I believe you can pass this along for execution\n",
"serialized_msg = proto_msg.SerializeToString()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_cost</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>345600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" total_cost\n",
"0 10000\n",
"1 10000\n",
"2 345600"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# duckdb\n",
"con.from_substrait(proto=serialized_msg).df()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_cost</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>345600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" total_cost\n",
"0 10000\n",
"1 10000\n",
"2 345600"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pyarrow\n",
"pa.substrait.run_query(serialized_msg, table_provider=table_provider).read_all().to_pandas()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## V3 Ibis -> Substrait plan -> Datafusion"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"from datafusion import SessionContext\n",
"from datafusion import substrait as ss"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"ctx = SessionContext()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border='1'>\n",
"</table>\n"
],
"text/plain": [
"DataFrame()\n",
"++\n",
"++"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ctx.sql('drop table if exists payments')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border='1'>\n",
"</table>\n"
],
"text/plain": [
"DataFrame()\n",
"++\n",
"++"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ctx.sql(queries[\"create\"])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<table border='1'>\n",
"<tr><th>count</td></tr>\n",
"<tr><td>3</td></tr>\n",
"</table>\n"
],
"text/plain": [
"DataFrame()\n",
"+-------+\n",
"| count |\n",
"+-------+\n",
"| 3 |\n",
"+-------+"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ctx.sql(queries[\"insert\"])"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>unit_price</th>\n",
" <th>quantity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10</td>\n",
" <td>1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3456</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10</td>\n",
" <td>1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>2000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>3456</td>\n",
" <td>100</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" unit_price quantity\n",
"0 10 1000\n",
"1 5 2000\n",
"2 3456 100\n",
"3 10 1000\n",
"4 5 2000\n",
"5 3456 100"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ctx.sql(queries[\"select\"]).to_pandas()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'\\x12\\x12\\x1a\\x10\\x08\\xff\\xff\\xff\\xff\\x0f\\x1a\\x08multiply\\x1as\\x12q\\nc:a\\n\\x05\\x12\\x03\\n\\x01\\x02\\x12>\\n<\\x12$\\n\\nunit_price\\n\\x08quantity\\x12\\x0c\\n\\x04*\\x02\\x10\\x01\\n\\x04*\\x02\\x10\\x01\"\\x08\\n\\x06\\n\\x00\\n\\x02\\x08\\x01:\\n\\n\\x08payments\\x1a\\x18\\x1a\\x16\"\\x08\\x1a\\x06\\x12\\x04\\n\\x02\\x12\\x00\"\\n\\x1a\\x08\\x12\\x06\\n\\x04\\x12\\x02\\x08\\x01\\x12\\ntotal_cost2\\x0e\\x10:*\\ndatafusion'"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"substrait_plan = ss.Serde.serialize_to_plan(queries[\"transform\"], ctx)\n",
"substrait_plan.encode()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"df_logical_plan = ss.Consumer.from_substrait_plan(ctx, substrait_plan)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_cost</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>345600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>345600</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" total_cost\n",
"0 10000\n",
"1 10000\n",
"2 345600\n",
"3 10000\n",
"4 10000\n",
"5 345600"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ctx.create_dataframe_from_logical_plan(df_logical_plan).to_pandas()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment