Skip to content

Instantly share code, notes, and snippets.

@SharafatKarim
Created January 22, 2023 13:05
Show Gist options
  • Save SharafatKarim/34612e75f7ef191714234b1bf253dd8d to your computer and use it in GitHub Desktop.
Save SharafatKarim/34612e75f7ef191714234b1bf253dd8d to your computer and use it in GitHub Desktop.
A simple program to convert pymongo data to csv file by using 'pymongo' and 'pandas'
# @Author: Sharafat Karim
# @Date: 6:59 PM Sunday, 22 January 2023
# @Email: [email protected]
# @Last modified time: 6:59 PM Sunday, 22 January 2023
# @Credit: https://kb.objectrocket.com/mongo-db/export-mongodb-documents-as-csv-html-and-json-files-in-python-using-pandas-347
# Pip packages import
try:
# library import
import pymongo
import pandas
except ModuleNotFoundError:
print("Dependency missing")
print("You can install with pip package manager. Command is,")
print("-> pip install pymongo pandas")
# MongoDB export
class MongoDB_export:
def __init__(self, URI: str) -> None:
self.URI = URI
self.mydb = "database"
def connection_test(self) -> None:
self.client = pymongo.MongoClient(self.URI)
self.client.list_database_names()
def list_databases(self) -> None:
print(self.client.list_database_names())
def enter_database(self, database: str) -> None:
self.mydb = self.client[database]
def list_collections(self) -> None:
print(self.mydb.list_collection_names())
def enter_collections(self, database: str) -> None:
self.mycol = self.mydb["user data"]
def print_first_data(self) -> None:
print(self.mycol.find_one())
def print_all_data(self) -> None:
try:
import pprint
for x in self.mycol.find():
pprint.pprint(x)
except ModuleNotFoundError:
for x in self.mycol.find():
print(x)
def data_count_of_collection(self) -> None:
print(len(list(self.mycol.find())))
def convert_to_csv_file(self) -> None:
cursor = self.mycol.find()
mongo_docs = list(cursor)
docs = pandas.DataFrame(columns=[])
for num, doc in enumerate(mongo_docs):
doc["_id"] = str(doc["_id"])
# get document _id from dict
doc_id = doc["_id"]
# create a Series obj from the MongoDB dict
series_obj = pandas.Series(doc, name=doc_id)
# append the MongoDB Series obj to the DataFrame obj
docs = docs.append(series_obj)
# export MongoDB documents to a CSV file
docs.to_csv("database.csv", ",") # CSV delimited by commas
def preview_in_csv_format(self) -> None:
cursor = self.mycol.find()
mongo_docs = list(cursor)
docs = pandas.DataFrame(columns=[])
for num, doc in enumerate(mongo_docs):
doc["_id"] = str(doc["_id"])
# get document _id from dict
doc_id = doc["_id"]
# create a Series obj from the MongoDB dict
series_obj = pandas.Series(doc, name=doc_id)
# append the MongoDB Series obj to the DataFrame obj
docs = docs.append(series_obj)
# export MongoDB documents to CSV
csv_export = docs.to_csv(sep=",") # CSV delimited by commas
print("\nCSV data:", csv_export)
### below is an example on how to use this
### if you need help, feel free to reach me, t.me/SharafatKarim
mongo = MongoDB_export(
URI="mongodb+srv://examplename:[email protected]/?retryWrites=true&w=majority") # must
mongo.connection_test() # must
# mongo.list_databases() # optional
# mongo.enter_database("database") # must
# mongo.list_collections() # optional
# mongo.enter_collections("user data") # must
# mongo.print_first_data() # optional
# mongo.print_all_data() # optional
# mongo.data_count_of_collection() # optional
# mongo.convert_to_csv_file() # optional - purpose
# mongo.preview_in_csv_format() # optional
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment