Skip to content

Instantly share code, notes, and snippets.

@DrPaulBrewer
Last active August 29, 2015 14:09
Show Gist options
  • Save DrPaulBrewer/ddd0322c16e8a79de6e3 to your computer and use it in GitHub Desktop.
Save DrPaulBrewer/ddd0322c16e8a79de6e3 to your computer and use it in GitHub Desktop.
csv_to_mongodb.py
# Copyright 2014 Dr Paul Brewer
# Economic and Financial Technology Consulting LLC -- www.eaftc.com
#
# python script to import CSV to mongoDB via csv.DictReader, pymongo
#
# You may copy or reuse this script under the terms of the standard MIT License:
#
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
import csv
from math import isnan
from pymongo import MongoClient
import sys
if len(sys.argv)<4:
print "usage: python ./csv_to_mongodb.py <filename> <mongoURL> <collection> [go|replace] "
sys.exit(-1)
filename = sys.argv[1]
mongoURL = sys.argv[2]
cname = sys.argv[3]
try:
noconfirm = sys.argv[4]
except:
noconfirm = None
print "importing "+filename
def fixnum(v):
try:
n = float(v)
return n
except ValueError:
return v
A = [{k:fixnum(v) for (k,v) in row.items() if v!=''} \
for row in csv.DictReader(open(filename,"r"))]
num = len(A)
print "CSV found "+str(num)+" rows"
print "mongodb is "+str(mongoURL)
mon = MongoClient(mongoURL)
db = mon.get_default_database()
print "collection is "+str(cname)
print "collection has "+str(db[cname].count())+" existing items"
if noconfirm not in ['go']:
if (noconfirm in ['replace']) or (raw_input("to delete existing type DELETE:") in ['DELETE']):
db[cname].remove()
print "removed"
print "collection now has "+str(db[cname].count())+" existing items"
if (noconfirm in ['go', 'replace']) or (raw_input("to insert CSV data type y or yes:") in ['y','Y','yes','Yes']):
try:
db[cname].insert(A)
print "inserted"
print "collection now has "+str(db[cname].count())+" existing items"
except Error as e:
print "oops! something went wrong"
print e.strerror()
else:
print "insert aborted"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment