danabauer · May 18, 2013 06:30
diff --git a/csv2pgsql.py b/csv2pgsql.py
 """
 !/usr/bin/env python
 csv2pgsql.py
 by Erik Osheim and Dana Bauer
 June 2011

 """

 import csv
 import os
 import psycopg2
 import sys

 # print an error message and exits
 def explode(error):
    print 'ERROR: %s' % error
    sys.exit(1)


 # this is the class that does our work
 class Loader(object):
    def __init__(self, dbname, tablename, columntypes):
        self.dbname = dbname
        self.conn = psycopg2.connect("dbname=" + dbname)

        self.tablename = tablename
        
        self.columntypes = columntypes

    # this function imports a single CSV file into newly-created DB table
    def load(self, csvpath):
        # make sure we can read the CSV file
        if not os.path.exists(csvpath):
            explode("csv file %r could not be found" % csvpath)
        
        # get a database cursor to do actual queries
        cur = self.conn.cursor()
        
        # open the csv file and being it
        f = open('39_redist_data.csv', 'r')
        reader = csv.reader(f)
        
        # we expect the first line to be the column names
        names = reader.next()

        # read the second line (the first data row) to detect types
        row = reader.next()
        self.types = self.getrowtypes(names, row)

        # create table
        self.createtable(cur)

        # load the first data row
        self.loadrow(cur, row)
        i = 2
        
        # load all the other data rows
        for row in reader:
            self.loadrow(cur, row)
            i += 1
            if i % 100 == 0:
                print i
        
        # close the cursor and save our work in the DB
        cur.close()
        self.conn.commit()

    # given a list of column names and a row of SQL, figures out what SQL
    # data types to use for each and returns a list combining names and types.
    # e.g. [("column1", "int"), ("column2", "text"), ...]
    def getrowtypes(self, names, row):
        types = []
        for name, val in zip(names, row):
            typ = self.getvaltype(name, val)
            types.append((name, typ))
        return types

    # given a name and a value (from the row), figure out its type.
    def getvaltype(self, name, val):
        if name in self.columntypes:
            return self.columntypes[name]
        try:
            int(val)
            return 'int'
        except:
            pass
        try:
            float(val)
            return 'float'
        except:
            pass
        return 'text'

    # build the table to store our CSV data in the database
    def createtable(self, cur):
        decls = [col.lower() + ' ' + val for col, val in self.types]

        sql = 'create table '
        sql += self.tablename
        sql += ' ('
        sql += ', '.join(decls)
        sql += ');'

        # uncomment to see the create table SQL
        #print sql
        cur.execute(sql)

        # build our insert (we'll use this for every row)
        cols = [col for col, _ in self.types]
        vals = ['%s' for t in self.types]
        self.insert = 'insert into '
        self.insert += self.tablename
        self.insert += ' ('
        self.insert += ', '.join(cols)
        self.insert += ') values ('
        self.insert += ', '.join(vals)
        self.insert += ')'

        # uncomment to see the insert SQL
        #print self.insert

    def loadrow(self, cur, row):
        for i in range(0, len(self.types)):
            if self.types[i][1] == 'text':
                continue
            elif row[i] == '':
                row[i] = None
            elif not row[i].isdigit():
                explode("column %r not a number" % self.types[i][0])
        cur.execute(self.insert, row)

 if __name__ == "__main__":
    # change this to choose a different CSV file to use
    csvpath = '39_redist_data.csv'

    # change this to choose a different database to use
    dbname = 'mdn'

    # change this to choose a different table name to use
    tablename = 'redist_data'

    # change this to override the autodetected types.
    # you might want this if a field only sometimes looks like a number.
    columntypes = {
        'GEOID10': 'text',
        'SLDU': 'text',
        'SLDL': 'text',
    }

    # instantiate the loader and load the data
    l = Loader(dbname, tablename, columntypes)
    l.load(csvpath)
	"""
	!/usr/bin/env python
	csv2pgsql.py
	by Erik Osheim and Dana Bauer
	June 2011

	"""

	import csv
	import os
	import psycopg2
	import sys

	# print an error message and exits
	def explode(error):
	print 'ERROR: %s' % error
	sys.exit(1)


	# this is the class that does our work
	class Loader(object):
	def __init__(self, dbname, tablename, columntypes):
	self.dbname = dbname
	self.conn = psycopg2.connect("dbname=" + dbname)

	self.tablename = tablename

	self.columntypes = columntypes

	# this function imports a single CSV file into newly-created DB table
	def load(self, csvpath):
	# make sure we can read the CSV file
	if not os.path.exists(csvpath):
	explode("csv file %r could not be found" % csvpath)

	# get a database cursor to do actual queries
	cur = self.conn.cursor()

	# open the csv file and being it
	f = open('39_redist_data.csv', 'r')
	reader = csv.reader(f)

	# we expect the first line to be the column names
	names = reader.next()

	# read the second line (the first data row) to detect types
	row = reader.next()
	self.types = self.getrowtypes(names, row)

	# create table
	self.createtable(cur)

	# load the first data row
	self.loadrow(cur, row)
	i = 2

	# load all the other data rows
	for row in reader:
	self.loadrow(cur, row)
	i += 1
	if i % 100 == 0:
	print i

	# close the cursor and save our work in the DB
	cur.close()
	self.conn.commit()

	# given a list of column names and a row of SQL, figures out what SQL
	# data types to use for each and returns a list combining names and types.
	# e.g. [("column1", "int"), ("column2", "text"), ...]
	def getrowtypes(self, names, row):
	types = []
	for name, val in zip(names, row):
	typ = self.getvaltype(name, val)
	types.append((name, typ))
	return types

	# given a name and a value (from the row), figure out its type.
	def getvaltype(self, name, val):
	if name in self.columntypes:
	return self.columntypes[name]
	try:
	int(val)
	return 'int'
	except:
	pass
	try:
	float(val)
	return 'float'
	except:
	pass
	return 'text'

	# build the table to store our CSV data in the database
	def createtable(self, cur):
	decls = [col.lower() + ' ' + val for col, val in self.types]

	sql = 'create table '
	sql += self.tablename
	sql += ' ('
	sql += ', '.join(decls)
	sql += ');'

	# uncomment to see the create table SQL
	#print sql
	cur.execute(sql)

	# build our insert (we'll use this for every row)
	cols = [col for col, _ in self.types]
	vals = ['%s' for t in self.types]
	self.insert = 'insert into '
	self.insert += self.tablename
	self.insert += ' ('
	self.insert += ', '.join(cols)
	self.insert += ') values ('
	self.insert += ', '.join(vals)
	self.insert += ')'

	# uncomment to see the insert SQL
	#print self.insert

	def loadrow(self, cur, row):
	for i in range(0, len(self.types)):
	if self.types[i][1] == 'text':
	continue
	elif row[i] == '':
	row[i] = None
	elif not row[i].isdigit():
	explode("column %r not a number" % self.types[i][0])
	cur.execute(self.insert, row)

	if __name__ == "__main__":
	# change this to choose a different CSV file to use
	csvpath = '39_redist_data.csv'

	# change this to choose a different database to use
	dbname = 'mdn'

	# change this to choose a different table name to use
	tablename = 'redist_data'

	# change this to override the autodetected types.
	# you might want this if a field only sometimes looks like a number.
	columntypes = {
	'GEOID10': 'text',
	'SLDU': 'text',
	'SLDL': 'text',
	}

	# instantiate the loader and load the data
	l = Loader(dbname, tablename, columntypes)
	l.load(csvpath)