Created
December 18, 2024 03:36
-
-
Save codeperfectplus/ecd6aaef972e2eea3faa0a25db13cae4 to your computer and use it in GitHub Desktop.
load good read book dataset in neo4j
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from neo4j import GraphDatabase | |
# Connect to Neo4j database | |
uri = "bolt://localhost:7687" | |
username = "neo4j" | |
password = "password" | |
driver = GraphDatabase.driver(uri, auth=(username, password)) | |
def create_book_nodes(tx, book_title, publisher, author_name, language, rating): | |
# Create Book node and Author node if not exists, then create relationships | |
tx.run(""" | |
MERGE (b:Book {title: $book_title, publisher: $publisher, language: $language, rating: $rating}) | |
MERGE (a:Author {name: $author_name}) | |
MERGE (a)-[:WROTE]->(b) | |
""", book_title=book_title, publisher=publisher, language=language, rating=rating, author_name=author_name) | |
def load_data_to_neo4j(csv_file_path): | |
# Load the Goodreads Books dataset into a pandas DataFrame | |
df = pd.read_csv(csv_file_path) | |
# replace NaN values with "Not Available" | |
df.fillna("Not Available", inplace=True) | |
# Start a session with Neo4j | |
with driver.session() as session: | |
# Iterate over each row in the dataset and load it into Neo4j | |
for index, row in df.iterrows(): | |
# Extract book details | |
book_title = row['Name'] | |
author_name = row['Authors'] # Assuming 'authors' column contains the author names | |
publisher = row['Publisher'] | |
language = row['Language'] | |
rating = row['Rating'] | |
# Create nodes and relationships in Neo4j | |
session.write_transaction(create_book_nodes, book_title, publisher, author_name, language, rating) | |
print(f"Data from {csv_file_path} has been loaded into Neo4j.") | |
# Path to the Goodreads dataset CSV file | |
csv_file_path = "/home/deepak/Downloads/good_reads_book_data.csv" # Replace with your actual path to the dataset | |
# Load data into Neo4j | |
load_data_to_neo4j(csv_file_path) | |
# Close the connection | |
driver.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment