Last active
June 1, 2023 03:25
-
-
Save pybokeh/59a764b38d7e70a40b4f3606b2236571 to your computer and use it in GitHub Desktop.
When trying to print or view an ibis table expression created from ibis duckdb client read_csv() method, it just hangs forever
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "90ac79de-14eb-4529-af92-6c173777ec05", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import ibis\n", | |
"import ibis.selectors as s\n", | |
"import pandas as pd\n", | |
"from ibis import _\n", | |
"ibis.options.interactive = True\n", | |
"\n", | |
"# create a DuckDB client\n", | |
"client = ibis.duckdb.connect()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "381eda91-ab17-4f8d-ab65-6c81d57e88d3", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"veh_crash_stats = client.read_csv('CrashStatistics.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "5c8e5d56-2c12-42e3-a1af-fdc67c36903a", | |
"metadata": {}, | |
"source": [ | |
"Running this or trying to display the ibis table expression just hangs:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "15000496-29d0-41a8-bff0-ec77b39bf084", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"veh_crash_stats" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f0383834-2b85-4dd4-8060-da5202d4622c", | |
"metadata": {}, | |
"source": [ | |
"#### But, if I read the csv using duckdb's read_csv() method, it fails immediately" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "4fe10d95-1f26-4852-b358-409f4f0f8875", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import duckdb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "7145b633-4e49-49c5-8e44-b40568b6315a", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "InvalidInputException", | |
"evalue": "Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). ", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mInvalidInputException\u001b[0m Traceback (most recent call last)", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\core\\formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[0;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[0;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[0;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[0;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[0;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[0;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[1;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[0;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[0;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[1;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[1;34m(obj, p, cycle)\u001b[0m\n\u001b[0;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[0;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[1;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[0;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n", | |
"\u001b[1;31mInvalidInputException\u001b[0m: Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). " | |
] | |
} | |
], | |
"source": [ | |
"duckdb.read_csv('CrashStatistics.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "bd374ef9-d20d-4eb0-8b95-ba786a54a233", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"ename": "InvalidInputException", | |
"evalue": "Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). ", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[1;31mInvalidInputException\u001b[0m Traceback (most recent call last)", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\core\\formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[0;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[0;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[0;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[0;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[0;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[0;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[1;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[0;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[0;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[1;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[0;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n", | |
"File \u001b[1;32m~\\apps\\Python39\\envs\\sql_dev\\lib\\site-packages\\IPython\\lib\\pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[1;34m(obj, p, cycle)\u001b[0m\n\u001b[0;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[0;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[1;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[0;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n", | |
"\u001b[1;31mInvalidInputException\u001b[0m: Invalid Input Error: Error in file \"CrashStatistics.csv\" on line 3219: quote should be followed by end of value, end of row or another quote. ( file=CrashStatistics.csv\n delimiter=',' (auto detected)\n quote='\"' (auto detected)\n escape='\"' (auto detected)\n header=1 (auto detected)\n sample_size=20480\n ignore_errors=0\n all_varchar=0). " | |
] | |
} | |
], | |
"source": [ | |
"duckdb.read_csv('CrashStatistics.csv', escapechar=None, quotechar=None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "605f7beb-8672-4498-8387-7dc492f27c42", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\tOH-2023-0074,20233004904,Property Damage Only,,Deerfield,False,True,False,False,False,False,False,\t01314,1,98,Clermont County,Township,Goshen (Township of),1/12/2023 6:35:00 PM,31010,39.181742,-84.179611,,,,Deerfield,Road,10.00,Feet,West,,,,5807,House Number,,False,,False,On Roadway,Not Collision Between Two Vehicles in Transport,Clear,Dark - Roadway Not Lighted,False,False,False,False,,,Unit 1 was traveling Southbound on Deerfield Rd. a Deer ran in front of Unit 1 at 5807 Deerfield Rd. Unit 1 struck the deer causing functionable damage to the vehicle.,Police,False,1/12/2023 6:44:00 PM,1/12/2023 6:46:00 PM,1/12/2023 6:56:00 PM,1/12/2023 7:04:00 PM,0,0,Smith; Tyler ,36,Collier; Cody ,82,False,,,Straight Level,Dry,Blacktop; Bituminous; Asphalt,0,0,18,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False\n" | |
] | |
} | |
], | |
"source": [ | |
"!head -n 3218 CrashStatistics.csv | tail -n 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "1db70878-78c2-4624-b182-538e19fb71ec", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\t2302076,20233004905,Property Damage Only,,,False,True,False,True,False,False,False,\t05215,2,1,Medina County,Township,Brunswick Hills (Township of),1/13/2023 3:56:00 PM,9708,41.260720,-81.860690,,,,SUBSTATION RD,Road,15.00,Feet,South,,,,GRAFTON RD,Intersection,Road,True,4,False,On Roadway,Rear-end,Snow,Daylight,False,False,False,False,,,Unit 2 travelling south on Substation Rd. was stopped at the Stop sign at Grafton Rd. when she was struck from behind by Unit 1. Driver of unit 1; also travelling south on Substation; states she was unable to slow down in time and swerved to avoid Unit 2; but lost control of the vehicle as it slid into Unit 2.,Police,False,1/13/2023 3:56:23 PM,1/13/2023 3:58:44 PM,1/13/2023 3:58:48 PM,1/13/2023 4:29:37 PM,60,0,ADRIAN NEAGU,1325,PRZEMYSLAW PIEKUT,1309,False,,,Straight Level,Snow,Blacktop; Bituminous; Asphalt,0,0,91,False,False,False,False,False,False,False,False,False,False,True,True,False,False,False,False,False\n" | |
] | |
} | |
], | |
"source": [ | |
"!head -n 3219 CrashStatistics.csv | tail -n 1" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"id": "f4b038da-3447-42bf-8fb9-d3b37732ff08", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"\t230031775,20233004906,Property Damage Only,,\"POLICE VEHICLE\" 134,False,False,False,False,False,False,False,\tCOP00,2,1,Franklin County,City,Columbus,1/13/2023 5:37:00 PM,18000,39.915730,-82.965950,,,,Lockbourne ,Road,20.00,Feet,North,State Route,104,,,Intersection,,True,4,False,On Roadway,Sideswipe; same direction,Snow,Dark - Lighted Roadway,False,False,False,False,,,Unit 1 and Unit 2 were traveling South on Lockbourne Rd. The operator of Unit 1 stated that he was changing lanes and did not see Unit 2 until he struck Unit 1. The Operator of Unit 2 stated that he was traveling straight ahead when Unit 1 went to switch lanes and struck Unit 2. The operator of Unit 1 was issued a citation for 2131.08A1 changing lanes without safety.,Police,False,1/13/2023 5:37:00 PM,1/13/2023 5:37:00 PM,1/13/2023 5:37:00 PM,1/13/2023 6:15:00 PM,0,43,DALZELL; EVAN,2723,BECKER; BRIAN,5232,False,,,Straight Level,Wet,Blacktop; Bituminous; Asphalt,0,0,38,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False\n" | |
] | |
} | |
], | |
"source": [ | |
"!head -n 3220 CrashStatistics.csv | tail -n 1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "d9afcdc8-d014-4f26-8c71-abcdf10989ec", | |
"metadata": {}, | |
"source": [ | |
"#### Not seeing anything obviously bad with row 3219" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Py3.9 (sql_dev)", | |
"language": "python", | |
"name": "sql_dev" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment