Created
July 27, 2024 18:13
-
-
Save rjurney/66778d025a2290c0df5dbe39e91842cd to your computer and use it in GitHub Desktop.
Loading ICIJ data into KuzuDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import kuzu | |
import kuzu.connection | |
import kuzu.database | |
def create_tables(conn: kuzu.connection.Connection) -> None: | |
try: | |
# Create a Person node table | |
conn.execute( | |
""" | |
CREATE NODE TABLE Person ( | |
node_id STRING, | |
name STRING, | |
countries STRING, | |
country_codes STRING, | |
sourceID STRING, | |
valid_until STRING, | |
note STRING, | |
PRIMARY KEY (node_id) | |
); | |
""" | |
) | |
# Create an Organization node table | |
conn.execute( | |
""" | |
CREATE NODE TABLE Organization ( | |
node_id STRING, | |
name STRING, | |
original_name STRING, | |
former_name STRING, | |
jurisdiction STRING, | |
jurisdiction_description STRING, | |
company_type STRING, | |
address STRING, | |
internal_id STRING, | |
incorporation_date STRING, | |
inactivation_date STRING, | |
struck_off_date STRING, | |
dorm_date STRING, | |
status STRING, | |
service_provider STRING, | |
ibcRUC STRING, | |
country_codes STRING, | |
countries STRING, | |
sourceID STRING, | |
valid_until STRING, | |
note STRING, | |
PRIMARY KEY (node_id) | |
); | |
""" | |
) | |
# Create an Officer_of relationship table | |
conn.execute( | |
""" | |
CREATE REL TABLE officer_of ( | |
FROM Person TO Organization, | |
rel_type STRING, | |
link STRING, | |
status STRING, | |
start_date STRING, | |
end_date STRING, | |
sourceID STRING, | |
MANY_MANY | |
); | |
""" | |
) | |
# Create an Officer_of relationship table | |
conn.execute( | |
""" | |
CREATE REL TABLE same_as ( | |
FROM Person TO Person, | |
MANY_MANY | |
); | |
""" | |
) | |
print("Tables created successfully.") | |
except Exception as e: | |
print(f"An error occurred while creating tables: {e}") | |
def load_tables(conn: kuzu.connection.Connection) -> None: | |
conn.execute('COPY Person FROM (LOAD FROM "data/kuzu/people.parquet" RETURN *)') | |
conn.execute('COPY Organization FROM (LOAD FROM "data/kuzu/organizations.parquet" RETURN *)') | |
conn.execute('COPY officer_of FROM (LOAD FROM "data/kuzu/officerships.parquet" RETURN *)') | |
def main() -> None: | |
try: | |
# Initialize database | |
db: kuzu.database.Database = kuzu.database.Database("data/kuzu/db") | |
conn: kuzu.connection.Connection = kuzu.connection.Connection(db) | |
create_tables(conn) | |
load_tables(conn) | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
finally: | |
if "conn" in locals(): | |
conn.close() | |
if "db" in locals(): | |
db.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment