Skip to content

Instantly share code, notes, and snippets.

@gerigk
Created March 21, 2012 16:42
Show Gist options
  • Save gerigk/2149399 to your computer and use it in GitHub Desktop.
Save gerigk/2149399 to your computer and use it in GitHub Desktop.
Inserting a DataFrame into Postgres
import psycopg2
import os
import sys
sys.path.append(os.path.abspath('../includes'))
import dbLoader
from datetime import datetime
class ReadFaker:
"""
This could be extended to include the index column optionally. Right now the index
is not inserted
"""
def __init__(self, data):
#self.iter = data.iterrows()
self.iter = data.itertuples()
def readline(self, size=None):
try:
line = [element for element in self.iter.next()[1:]]
except StopIteration:
return ''
else:
return ("\t".join(['%s'] * len(line)) + '\n') % tuple(line)
read = readline
def insert(df, table, con=None, columns = None):
time1 = datetime.now()
close_con = False
if not con:
try:
con = dbLoader.getCon() ###dbLoader returns a connection with my settings
close_con = True
except psycopg2.Error, e:
print e.pgerror
print e.pgcode
return "failed"
inserted_rows = df.shape[0]
data = ReadFaker(df)
try:
curs = con.cursor()
print 'inserting %s entries into %s ...' % (inserted_rows, table)
if columns is not None:
curs.copy_from(data, table, null='nan', columns=[col for col in columns])
else:
curs.copy_from(data, table, null='nan')
con.commit()
curs.close()
if close_con:
con.close()
except psycopg2.Error, e:
print e.pgerror
print e.pgcode
con.rollback()
if close_con:
con.close()
return "failed"
time2 = datetime.now()
print time2 - time1
return inserted_rows
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment