Created
January 16, 2018 15:54
-
-
Save johnludwigm/376674a58913c6b0ec45e9563f2e9efc to your computer and use it in GitHub Desktop.
Creates IMDb Database Using sqlite3 in Python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sqlite3 | |
| import StreamTSV | |
| import sqliteops | |
| import create_statements as cts | |
| import os | |
| table_urls = [("Title", "https://datasets.imdbws.com/title.basics.tsv.gz"), | |
| ("Name", "https://datasets.imdbws.com/name.basics.tsv.gz"), | |
| #("", "https://datasets.imdbws.com/title.akas.tsv.gz"), | |
| ("Crew", "https://datasets.imdbws.com/title.crew.tsv.gz"), | |
| ("Episode", "https://datasets.imdbws.com/title.episode.tsv.gz"), | |
| ("Principals", "https://datasets.imdbws.com/title.principals.tsv.gz"), | |
| ("Ratings", "https://datasets.imdbws.com/title.ratings.tsv.gz")] | |
| def main(dbname): | |
| #Create database file. | |
| if os.path.exists(dbname): | |
| print("The file %s already exists." % dbname) | |
| return | |
| conn = sqlite3.connect(dbname) | |
| cursor = conn.cursor() | |
| #Create the tables. | |
| for statement in cts.create_table_statements: | |
| cursor.executescript(statement) | |
| #Now fill the tables in order. | |
| for table_name, url in table_urls: | |
| print("Now filling table %s." % table_name) | |
| if table_name != "Principals": | |
| sqliteops.insert(conn, table_name, StreamTSV.StreamTSV(table_name, url)) | |
| else: | |
| #This is the step where we separate the principal_cast in Principals. | |
| for generator in map(StreamTSV.principalsrow, StreamTSV.StreamTSV(table_name, url)): | |
| sqliteops.insert(conn, table_name, generator) | |
| conn.commit() | |
| #Add the other columns. | |
| for statement in cts.add_column_statements: | |
| cursor.execute(statement) | |
| #Create indices from script. | |
| for statement in cts.create_index_statements: | |
| cursor.execute(statement) | |
| conn.commit() | |
| cursor.close() | |
| conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
This code seems to be very compact and useful. But I can't find the StreamTVS library anywhere. Is this a local file not available to public, or what library/package is it? And where to find it?
KR
Michael