Last active
September 8, 2023 08:18
-
-
Save pmav99/3fe2c2eb5a3e4c81deff034dd3e30b93 to your computer and use it in GitHub Desktop.
append to parquet files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "c8a4fdc4-18b1-445c-979d-ea4c65e9c18e", | |
"metadata": {}, | |
"source": [ | |
"## Appending to parquet files\n", | |
"\n", | |
"Requires `engine == \"fastparquet\"`" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "3aed73de-de12-407c-9837-9ac3996bf722", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.141307Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.141091Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.404449Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.403983Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.141284Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import fastparquet" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7b44c87d-66be-4c21-b9e7-dadf5cefd1fc", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:20:28.580984Z", | |
"iopub.status.busy": "2023-09-07T13:20:28.580723Z", | |
"iopub.status.idle": "2023-09-07T13:20:28.840869Z", | |
"shell.execute_reply": "2023-09-07T13:20:28.840281Z", | |
"shell.execute_reply.started": "2023-09-07T13:20:28.580965Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"Create data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "17c1f106-9299-4b55-b316-eac12d07c915", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.405753Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.405523Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.410473Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.410019Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.405736Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"filename = \"/tmp/df.parquet\"\n", | |
"\n", | |
"data = [1, 2, 3, 4, 5, 4, 4, 3, 2, 4]\n", | |
"index1 = pd.date_range(\"2001-01-01T00:00:00\", \"2001-01-01T00:09:00\", freq=\"min\")\n", | |
"index2 = pd.date_range(\"2001-01-02T00:00:00\", \"2001-01-02T00:09:00\", freq=\"min\")\n", | |
"\n", | |
"initial_df = pd.DataFrame(\n", | |
" data={\"rad\": data},\n", | |
" index=index1,\n", | |
" dtype=float,\n", | |
")\n", | |
"initial_df.index.name = \"time\"\n", | |
"initial_df.attrs.update(dict(a=1, b=2, c=3))\n", | |
"\n", | |
"to_be_appended_df = pd.DataFrame(\n", | |
" data={\"rad\": data},\n", | |
" index=index2,\n", | |
" dtype=int,\n", | |
")\n", | |
"to_be_appended_df.index.name = \"time\"" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "f4fd8d33-d064-4b90-90b8-e6b4bb3ba5cd", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:34:33.464552Z", | |
"iopub.status.busy": "2023-09-07T13:34:33.464279Z", | |
"iopub.status.idle": "2023-09-07T13:34:33.699552Z", | |
"shell.execute_reply": "2023-09-07T13:34:33.698907Z", | |
"shell.execute_reply.started": "2023-09-07T13:34:33.464530Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"Write initial file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "f31da3ab-356e-4b84-bbf8-a633fa0eb2bd", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.411171Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.410998Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.647088Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.646477Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.411155Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-rw-r--r-- 1 panos panos 949 Sep 8 11:18 /tmp/df.parquet\n" | |
] | |
} | |
], | |
"source": [ | |
"!rm -rf {filename}\n", | |
"initial_df.to_parquet(filename, engine=\"fastparquet\")\n", | |
"!ls -lah {filename}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "6d9866e0-d06b-4247-b93e-d33052067315", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:36:19.214367Z", | |
"iopub.status.busy": "2023-09-07T13:36:19.214138Z", | |
"iopub.status.idle": "2023-09-07T13:36:19.454709Z", | |
"shell.execute_reply": "2023-09-07T13:36:19.453971Z", | |
"shell.execute_reply.started": "2023-09-07T13:36:19.214347Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"Read data from disk" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "77ed6e89-916b-45a4-bf53-ccdb9f9d0dcf", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.648075Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.647910Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.744014Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.743527Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.648059Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"read_df = pd.read_parquet(filename)\n", | |
"assert initial_df.equals(read_df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "681207dd-c146-44c1-b252-9ba4650b1980", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:44:43.906015Z", | |
"iopub.status.busy": "2023-09-07T13:44:43.905870Z", | |
"iopub.status.idle": "2023-09-07T13:44:44.139911Z", | |
"shell.execute_reply": "2023-09-07T13:44:44.138945Z", | |
"shell.execute_reply.started": "2023-09-07T13:44:43.905997Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"### Warning! - Attributes don't seem to be preserved" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "30f725db-e149-4c68-94f0-86c31bc3f8e8", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.744632Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.744502Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.749195Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.748816Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.744610Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{}" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"read_df.attrs" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "b2b16734-320a-4be6-bca9-acfb63646894", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:38:33.671948Z", | |
"iopub.status.busy": "2023-09-07T13:38:33.671833Z", | |
"iopub.status.idle": "2023-09-07T13:38:33.908914Z", | |
"shell.execute_reply": "2023-09-07T13:38:33.908171Z", | |
"shell.execute_reply.started": "2023-09-07T13:38:33.671936Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"### Append data to disk" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "9fa09491-c95d-4b5e-8f85-a3a2e745b767", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.749816Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.749690Z", | |
"iopub.status.idle": "2023-09-08T08:18:10.991365Z", | |
"shell.execute_reply": "2023-09-08T08:18:10.990017Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.749804Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-rw-r--r-- 1 panos panos 949 Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 1.3K Sep 8 11:18 /tmp/df.parquet\n" | |
] | |
} | |
], | |
"source": [ | |
"!ls -lah {filename}\n", | |
"to_be_appended_df.to_parquet(filename, engine=\"fastparquet\", append=True)\n", | |
"!ls -lah {filename}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "f988b009-9504-4a8e-803d-5a52a36e5a28", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:10.995429Z", | |
"iopub.status.busy": "2023-09-08T08:18:10.995019Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.007706Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.006968Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:10.995395Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"read_appended_df = pd.read_parquet(filename)\n", | |
"assert pd.concat([initial_df, to_be_appended_df]).equals(read_appended_df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8cf4af61-1e10-4f61-b068-807c278b501c", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T13:39:16.153860Z", | |
"iopub.status.busy": "2023-09-07T13:39:16.153627Z", | |
"iopub.status.idle": "2023-09-07T13:39:16.161716Z", | |
"shell.execute_reply": "2023-09-07T13:39:16.161183Z", | |
"shell.execute_reply.started": "2023-09-07T13:39:16.153840Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"## Warning\n", | |
"\n", | |
"It is possible to append multiple times the same data, i.e. you can end up with duplicates in the index.\n", | |
"\n", | |
"It is up to us to ensure that we have a monotonic index." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "9e68162e-28a5-49e4-b75a-ed89ff3191a7", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.009039Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.008763Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.738002Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.737137Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.009014Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"-rw-r--r-- 1 panos panos 1.3K Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 1.6K Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 1.9K Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 2.2K Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 2.5K Sep 8 11:18 /tmp/df.parquet\n", | |
"-rw-r--r-- 1 panos panos 2.8K Sep 8 11:18 /tmp/df.parquet\n" | |
] | |
} | |
], | |
"source": [ | |
"for i in range(5):\n", | |
" !ls -lah {filename}\n", | |
" to_be_appended_df.to_parquet(filename, engine=\"fastparquet\", append=True)\n", | |
"!ls -lah {filename}" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "a5d39d7d-8454-42b4-88f6-0a49fcdcd6f8", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T14:16:07.432437Z", | |
"iopub.status.busy": "2023-09-07T14:16:07.432113Z", | |
"iopub.status.idle": "2023-09-07T14:16:07.438124Z", | |
"shell.execute_reply": "2023-09-07T14:16:07.437070Z", | |
"shell.execute_reply.started": "2023-09-07T14:16:07.432412Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"### How parquet stores data to disk\n", | |
"\n", | |
"`parquet` stores data column-wise. The data of each column are stored in chunks named \"row-groups\".\n", | |
"\n", | |
"The default value for each \"row-group\" in `fastparquet` is 50_000_000, but this value can be changed,\n", | |
"and, this value can be different for each \"row-group\".\n", | |
"This can be useful if for example you want each \"row-group\" to contain the data of a whole year.\n", | |
"\n", | |
"### Efficient reading of metadata\n", | |
"\n", | |
"Parquet files contain statistics for each \"row-group\". You can access these statistics like this:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "ac54109e-f3fb-4f38-953a-3f77a39cd138", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.739389Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.739165Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.745377Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.744861Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.739367Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"dict_keys(['min', 'max', 'null_count', 'distinct_count'])" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pf = fastparquet.ParquetFile(filename)\n", | |
"pf.statistics.keys()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "1a742201-6d78-4fe7-8c8b-12146e6527c1", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T14:16:07.432437Z", | |
"iopub.status.busy": "2023-09-07T14:16:07.432113Z", | |
"iopub.status.idle": "2023-09-07T14:16:07.438124Z", | |
"shell.execute_reply": "2023-09-07T14:16:07.437070Z", | |
"shell.execute_reply.started": "2023-09-07T14:16:07.432412Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"For example, the maximum value of the first two \"row-groups\" of column `rad` are:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "fd7f0b30-d8f4-4e8c-b50b-f103d699281d", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.746539Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.746102Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.758707Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.758066Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.746519Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5.0" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"5.0" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pf.statistics[\"max\"][\"rad\"][0]\n", | |
"pf.statistics[\"max\"][\"rad\"][1]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ad546f83-35cc-461a-b572-2b0831ea6350", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T14:16:07.432437Z", | |
"iopub.status.busy": "2023-09-07T14:16:07.432113Z", | |
"iopub.status.idle": "2023-09-07T14:16:07.438124Z", | |
"shell.execute_reply": "2023-09-07T14:16:07.437070Z", | |
"shell.execute_reply.started": "2023-09-07T14:16:07.432412Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"The maximum rad value is:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "ee9b7ace-9b81-481e-92e8-ad2c9a262a91", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.759638Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.759440Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.763719Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.763172Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.759618Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"5.0" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"max(pf.statistics[\"max\"][\"rad\"])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "7d0828d2-27fc-44a1-b865-e542e55116dd", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T14:16:07.432437Z", | |
"iopub.status.busy": "2023-09-07T14:16:07.432113Z", | |
"iopub.status.idle": "2023-09-07T14:16:07.438124Z", | |
"shell.execute_reply": "2023-09-07T14:16:07.437070Z", | |
"shell.execute_reply.started": "2023-09-07T14:16:07.432412Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"This can be very useful in our case because it allows us to get the most recent timestamp in a very efficient way (without reading/decompressing the full file from the disk):" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "5663631c-b2d0-48dc-9fe7-57e8706bf48a", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.764608Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.764410Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.769240Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.768688Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.764579Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"numpy.datetime64('2001-01-02T00:09:00.000000000')" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"most_recent_ts = pf.statistics[\"max\"][\"time\"][-1]\n", | |
"most_recent_ts" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "ab4cdfc9-58a4-4dbf-8c41-f3c2ce462db3", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-07T14:16:07.432437Z", | |
"iopub.status.busy": "2023-09-07T14:16:07.432113Z", | |
"iopub.status.idle": "2023-09-07T14:16:07.438124Z", | |
"shell.execute_reply": "2023-09-07T14:16:07.437070Z", | |
"shell.execute_reply.started": "2023-09-07T14:16:07.432412Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"## Efficient reading of \"row-groups\"\n", | |
"\n", | |
"If you know that you only need the data of a specific \"row-group\" then you can avoid reading/decompressing the whole file from disk:\n", | |
"\n", | |
"For that we need to ditch the `pandas` interface and directly use the `fastparquet` one, though:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "088651c4-cc38-45fc-a994-66699d0ebb6a", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T08:18:11.770025Z", | |
"iopub.status.busy": "2023-09-08T08:18:11.769846Z", | |
"iopub.status.idle": "2023-09-08T08:18:11.780828Z", | |
"shell.execute_reply": "2023-09-08T08:18:11.780252Z", | |
"shell.execute_reply.started": "2023-09-08T08:18:11.770007Z" | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(70, 10)" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pf = fastparquet.ParquetFile(filename)\n", | |
"\n", | |
"full_df = pf.to_pandas()\n", | |
"last_row_group_df = pf[-1].to_pandas()\n", | |
"\n", | |
"len(full_df), len(last_row_group_df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "8c551352-d52a-4369-952c-7bc2b485adc7", | |
"metadata": { | |
"execution": { | |
"iopub.execute_input": "2023-09-08T07:17:33.142315Z", | |
"iopub.status.busy": "2023-09-08T07:17:33.142063Z", | |
"iopub.status.idle": "2023-09-08T07:17:33.150592Z", | |
"shell.execute_reply": "2023-09-08T07:17:33.150098Z", | |
"shell.execute_reply.started": "2023-09-08T07:17:33.142297Z" | |
}, | |
"tags": [] | |
}, | |
"source": [ | |
"Parquet supports additional ways that can speed up reading of files like filters etc, but I don't think that they are relevant in our case.\n" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "scraper", | |
"language": "python", | |
"name": "scraper" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment