Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save vijayanandrp/09d2c35edb15f9df209169128fba194b to your computer and use it in GitHub Desktop.
Save vijayanandrp/09d2c35edb15f9df209169128fba194b to your computer and use it in GitHub Desktop.
Pymongo + Pandas - Iterators to Dataframes conversion. - https://informationcorners.com/pymongo-wrapper
import pandas as pd

def posts_2_df(iterator, chunk_size=1000):
    """
        Turn an iterator into multiple small pandas.DataFrame
        This is a balance between memory and efficiency
    """
    records = []
    frames = []
    for index, record in enumerate(iterator):
        records.append(record)
    if index % chunk_size == chunk_size - 1:
        frames.append(pd.DataFrame(records))
        records = []
    if records:
        frames.append(pd.DataFrame(records))
    return pd.concat(frames)

result = collection.find({'grades.date': {'$gte': start_date, '$lte': end_date}})

data_frame = posts_2_df(iterator=result, chunk_size=10000)

print(data_frame.head())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment