Created
June 18, 2015 13:46
-
-
Save tonellotto/5024040587ed73db8f31 to your computer and use it in GitHub Desktop.
XML 2 SQL for Stack Overflow
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# http://paste.org/8946 "no" 2009 | |
# Updated by Tero Karvinen http://TeroKarvinen.com | |
import xml.sax.handler | |
import xml.sax | |
import sys | |
class SOHandler(xml.sax.handler.ContentHandler): | |
def __init__(self): | |
self.errParse = 0 | |
def startElement(self, name, attributes): | |
if name != "row": | |
self.table = name; | |
self.outFile = open(name+".sql","w") | |
self.errfile = open(name+".err","w") | |
else: | |
skip = 0 | |
currentRow = u"insert into "+self.table+"(" | |
for attr in attributes.keys(): | |
currentRow += str(attr) + "," | |
currentRow = currentRow[:-1] | |
currentRow += u") values (" | |
for attr in attributes.keys(): | |
try: | |
currentRow += u'"{0}", \ | |
'.format(attributes[attr].replace('\\','\\\\').replace('"', '\\"').replace("'", "\\'")) | |
except UnicodeEncodeError: | |
self.errParse += 1; | |
skip = 1; | |
self.errfile.write(currentRow) | |
if skip != 1: | |
currentRow = currentRow[:-1] | |
currentRow += u");" | |
#print len(attributes.keys()) | |
self.outFile.write(currentRow.encode("utf-8")) | |
self.outFile.write("") | |
self.outFile.flush() | |
print currentRow.encode("utf-8"); | |
def characters(self, data): | |
pass | |
def endElement(self, name): | |
pass | |
if len(sys.argv) < 2: | |
print "Give me an xml file argument!" | |
sys.exit(1) | |
parser = xml.sax.make_parser() | |
handler = SOHandler() | |
parser.setContentHandler(handler) | |
parser.parse(sys.argv[1]) | |
print handler.errParse |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Taken and modified from http://terokarvinen.com/2012/reading-stackoverflow-xml-dump-to-mysql-database