Created
November 2, 2023 17:51
-
-
Save renegarcia/9812aa7ddc6dbb9e0342a1aa2c92e106 to your computer and use it in GitHub Desktop.
Extract a zipped collection on csv files into a fresh sqlite3 database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
extract_zip_to_sqlite.py: Extract a zipped collection on csv files into a fresh sqlite3 database. | |
== Requirements == | |
* A fairly recent version of Pandas. | |
* An schema of the tables definitions. | |
== Ussage == | |
Update the value of `DATABASE`, `ZIPFILE` and `SCHEMA_LOCATION` as necessary. | |
""" | |
from pathlib import Path | |
from zipfile import ZipFile | |
from io import StringIO | |
import pandas as pd | |
import sqlite3 | |
DATABASE = "database.sqlite3" | |
ZIPFILE = "zipfile_location.zip" | |
SCHEMA_LOCATION = "schema.sql" | |
if not Path(DATABASE).exists(): | |
with open(SCHEMA_LOCATION) as f: | |
schema = f.read() | |
with sqlite3.connect(DATABASE) as con: | |
cur = con.cursor() | |
cur.executescript(schema) | |
con.close() | |
with ZipFile(ZIPFILE) as zipfile: | |
buffer = StringIO() | |
for csvfile in zipfile.namelist(): | |
with zipfile.open(csvfile) as f: | |
tablename = Path(csvfile).stem | |
df = pd.read_csv(f) | |
with sqlite3.connect(DATABASE) as con: | |
cur = con.cursor() | |
cur.execute(f"DELETE FROM {tablename}") | |
con.commit() | |
df.to_sql(tablename, con, index=False, if_exists="append") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment