Skip to content

Instantly share code, notes, and snippets.

@lebedov
Created October 4, 2013 19:31
Show Gist options
  • Select an option

  • Save lebedov/6831387 to your computer and use it in GitHub Desktop.

Select an option

Save lebedov/6831387 to your computer and use it in GitHub Desktop.
Create generator for reading consecutive chunks of data from a database table using pandas.
#!/usr/bin/env python
"""
Create generator for reading consecutive chunks of data from a database table using pandas.
"""
import pandas.io.sql
import sqlite3
def read_db(conn, table, names=None, chunksize=None):
"""
Return generator for reading consecutive chunks of data from a table as
DataFrames.
Parameters
----------
conn : DB connection object
table : str
Table name.
names : list of str
Column names to read.
chunksize : int
Number of rows to return in each call to the generator.
"""
# Select all columns by default:
if names is None:
names_str = '*'
else:
names_str = ','.join(names)
# Return all rows in table in first chunk by default:
if chunksize is None:
query = 'select %s from %s' % (names_str, table)
done = False
while True:
if not done:
result = pandas.io.sql.read_frame(query, conn)
done = True
yield result
else:
raise StopIteration
else:
offset = 0
while True:
query = 'select %s from %s limit %i offset %i' % (names_str, table, chunksize, offset)
result = pandas.io.sql.read_frame(query, conn)
result.index = range(offset, offset+len(result))
if len(result):
yield result
else:
raise StopIteration
offset += chunksize
if __name__ == '__main__':
# Create a test database:
conn = sqlite3.connect(':memory:')
conn.execute("create table test (label str, val int)")
data = [('a', 1),
('a', 2),
('b', 9),
('b', 10),
('b', 11),
('c', 12),
('c', 13),
('c', 14)]
conn.executemany('insert into test values (?, ?)', data)
conn.commit()
# Extract all data in one chunk:
rd = read_db(conn, 'test')
for x in rd:
print x
# Extract data in several chunks:
rd = read_db(conn, 'test', '*', 3)
for x in rd:
print x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment