Skip to content

Instantly share code, notes, and snippets.

@devfreitag
Created August 15, 2024 17:44
Show Gist options
  • Save devfreitag/1f5b572940ac6f50f54da7a4024a97fe to your computer and use it in GitHub Desktop.
Save devfreitag/1f5b572940ac6f50f54da7a4024a97fe to your computer and use it in GitHub Desktop.
Python script to read CSV file and insert row in MongoDB
from pymongo import MongoClient
import pandas as pd
# Connect to MongoDB with username and password
username = 'USERNAME'
password = 'PASSWORD'
host = 'localhost'
port = 27017
database_name = 'DATABASE'
# Connection URI with authentication
uri = f'mongodb://{username}:{password}@{host}:{port}/{database_name}'
# Connect to MongoDB
client = MongoClient(uri)
db = client[database_name]
collection = db['COLLECTION']
# Path to the CSV file - 103.182 records
csv_file = 'CSV_FILE.csv'
# Chunk size for reading the data
chunk_size = 10000 # 10k
# Specify the columns you want to import
desired_columns = ['id', 'name', 'email', 'created_at']
# Initialize a counter for processed records
processed_records = 0
# Read the CSV in chunks, selecting only the desired columns, and insert into MongoDB
for chunk in pd.read_csv(csv_file, usecols=desired_columns, chunksize=chunk_size):
# Convert the chunk to a list of dictionaries
data = chunk.to_dict(orient='records')
# Insert the data into MongoDB
collection.insert_many(data)
# Update the processed records count
processed_records += len(chunk)
# Log the progress
print(f'Processed: {processed_records} records.')
# Verify the number of documents inserted
document_count = collection.count_documents({})
print(f'Total documents inserted: {document_count}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment