Skip to content

Instantly share code, notes, and snippets.

@rjurney
Created July 27, 2024 18:13
Show Gist options
  • Save rjurney/66778d025a2290c0df5dbe39e91842cd to your computer and use it in GitHub Desktop.
Save rjurney/66778d025a2290c0df5dbe39e91842cd to your computer and use it in GitHub Desktop.
Loading ICIJ data into KuzuDB
import kuzu
import kuzu.connection
import kuzu.database
def create_tables(conn: kuzu.connection.Connection) -> None:
try:
# Create a Person node table
conn.execute(
"""
CREATE NODE TABLE Person (
node_id STRING,
name STRING,
countries STRING,
country_codes STRING,
sourceID STRING,
valid_until STRING,
note STRING,
PRIMARY KEY (node_id)
);
"""
)
# Create an Organization node table
conn.execute(
"""
CREATE NODE TABLE Organization (
node_id STRING,
name STRING,
original_name STRING,
former_name STRING,
jurisdiction STRING,
jurisdiction_description STRING,
company_type STRING,
address STRING,
internal_id STRING,
incorporation_date STRING,
inactivation_date STRING,
struck_off_date STRING,
dorm_date STRING,
status STRING,
service_provider STRING,
ibcRUC STRING,
country_codes STRING,
countries STRING,
sourceID STRING,
valid_until STRING,
note STRING,
PRIMARY KEY (node_id)
);
"""
)
# Create an Officer_of relationship table
conn.execute(
"""
CREATE REL TABLE officer_of (
FROM Person TO Organization,
rel_type STRING,
link STRING,
status STRING,
start_date STRING,
end_date STRING,
sourceID STRING,
MANY_MANY
);
"""
)
# Create an Officer_of relationship table
conn.execute(
"""
CREATE REL TABLE same_as (
FROM Person TO Person,
MANY_MANY
);
"""
)
print("Tables created successfully.")
except Exception as e:
print(f"An error occurred while creating tables: {e}")
def load_tables(conn: kuzu.connection.Connection) -> None:
conn.execute('COPY Person FROM (LOAD FROM "data/kuzu/people.parquet" RETURN *)')
conn.execute('COPY Organization FROM (LOAD FROM "data/kuzu/organizations.parquet" RETURN *)')
conn.execute('COPY officer_of FROM (LOAD FROM "data/kuzu/officerships.parquet" RETURN *)')
def main() -> None:
try:
# Initialize database
db: kuzu.database.Database = kuzu.database.Database("data/kuzu/db")
conn: kuzu.connection.Connection = kuzu.connection.Connection(db)
create_tables(conn)
load_tables(conn)
except Exception as e:
print(f"An error occurred: {e}")
finally:
if "conn" in locals():
conn.close()
if "db" in locals():
db.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment