Created
August 15, 2024 17:44
-
-
Save devfreitag/1f5b572940ac6f50f54da7a4024a97fe to your computer and use it in GitHub Desktop.
Python script to read CSV file and insert row in MongoDB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pymongo import MongoClient | |
import pandas as pd | |
# Connect to MongoDB with username and password | |
username = 'USERNAME' | |
password = 'PASSWORD' | |
host = 'localhost' | |
port = 27017 | |
database_name = 'DATABASE' | |
# Connection URI with authentication | |
uri = f'mongodb://{username}:{password}@{host}:{port}/{database_name}' | |
# Connect to MongoDB | |
client = MongoClient(uri) | |
db = client[database_name] | |
collection = db['COLLECTION'] | |
# Path to the CSV file - 103.182 records | |
csv_file = 'CSV_FILE.csv' | |
# Chunk size for reading the data | |
chunk_size = 10000 # 10k | |
# Specify the columns you want to import | |
desired_columns = ['id', 'name', 'email', 'created_at'] | |
# Initialize a counter for processed records | |
processed_records = 0 | |
# Read the CSV in chunks, selecting only the desired columns, and insert into MongoDB | |
for chunk in pd.read_csv(csv_file, usecols=desired_columns, chunksize=chunk_size): | |
# Convert the chunk to a list of dictionaries | |
data = chunk.to_dict(orient='records') | |
# Insert the data into MongoDB | |
collection.insert_many(data) | |
# Update the processed records count | |
processed_records += len(chunk) | |
# Log the progress | |
print(f'Processed: {processed_records} records.') | |
# Verify the number of documents inserted | |
document_count = collection.count_documents({}) | |
print(f'Total documents inserted: {document_count}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment