Skip to content

Instantly share code, notes, and snippets.

@ljmartin
Created January 8, 2022 21:48
Show Gist options
  • Save ljmartin/7ec90ea29f5c8b29dd7f4a6bb2b0fb3b to your computer and use it in GitHub Desktop.
Save ljmartin/7ec90ea29f5c8b29dd7f4a6bb2b0fb3b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "970f676d-6970-40b1-a265-d5b985c34399",
"metadata": {},
"outputs": [],
"source": [
"import random\n",
"import string\n",
"import duckdb\n",
"\n",
"letters = string.ascii_lowercase\n",
"digits = string.octdigits\n",
"\n",
"n = 10\n",
"\n",
"with open('my_file.csv', 'w') as f:\n",
" f.write('word,number\\n')\n",
" for i in range(n):\n",
" word = ''.join(random.choices(letters, k=10))\n",
" number = ''.join(random.choices(digits, k=10))\n",
" f.write('\"'+word+'\"'+','+number+'\\n')\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "f7de03da-408a-4d29-a2f9-135c9a85cdf3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"word,number\n",
"\"subdufztcx\",0240011573\n",
"\"lkpmnvmros\",3167211207\n",
"\"fxrzwapmzk\",3414157241\n",
"\"zktrevjqqt\",7012404300\n",
"\"rdbcjzttcz\",0175470322\n",
"\"kllpqosmfo\",4105323651\n",
"\"myamogkoys\",0624346037\n",
"\"mklmbewjxd\",3606322245\n",
"\"qkznmlzzgo\",0254104735\n",
"\"flnwxelfwz\",6427577014\n"
]
}
],
"source": [
"!cat my_file.csv"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "99dfbf16-8f7c-4a63-a663-7a58f9f03818",
"metadata": {},
"outputs": [
{
"ename": "RuntimeError",
"evalue": "Invalid Input Error: Could not convert string '5321253223' to INT32 between line 1 and 1025 in column 1. Parser options: DELIMITER=',' (default), QUOTE='\"' (default), ESCAPE='\"' (default), HEADER=1, SAMPLE_SIZE=10240, ALL_VARCHAR=0 ",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/var/folders/jh/02165y2n7kq2y5ychxtzcjm40000gn/T/ipykernel_60331/3883165557.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m ); \"\"\")\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mcursor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexecute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"COPY example from 'file.csv' (HEADER);\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mRuntimeError\u001b[0m: Invalid Input Error: Could not convert string '5321253223' to INT32 between line 1 and 1025 in column 1. Parser options: DELIMITER=',' (default), QUOTE='\"' (default), ESCAPE='\"' (default), HEADER=1, SAMPLE_SIZE=10240, ALL_VARCHAR=0 "
]
}
],
"source": [
"conn = duckdb.connect('example.db')\n",
"cursor = conn.cursor()\n",
"\n",
"cursor.execute(\"\"\" CREATE TABLE IF NOT EXISTS example (\n",
" word STRING,\n",
" number INTEGER\n",
" ); \"\"\")\n",
"\n",
"cursor.execute(\"COPY example from 'file.csv' (HEADER);\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19ed0909-d21b-407e-8e26-4cd0b075f957",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment