Created
June 15, 2017 22:24
-
-
Save vijayanandrp/c9a41ef9bc1513362a2ce0e86224d618 to your computer and use it in GitHub Desktop.
Step by Step method to parse XML files and store into sqlite db and vice versa
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __author__ = 'admin' | |
| import sqlite3 | |
| db = sqlite3.connect('books_store.db') | |
| cursor = db.cursor() | |
| try: | |
| cursor.execute('''DROP TABLE books''') | |
| print 'Deleted the old table' | |
| except Exception as error: | |
| print 'Error: ', str(error) | |
| db.commit() | |
| cursor.execute('''CREATE TABLE IF NOT EXISTS books ( | |
| book_id TEXT PRIMARY KEY, | |
| author TEXT, | |
| title TEXT UNIQUE, | |
| genre TEXT, | |
| price REAL, | |
| publish_date DATE, | |
| description TEXT )''') | |
| db.commit() | |
| import xml.etree.ElementTree as eTree | |
| tree = eTree.parse('book.xml') | |
| library = tree.getroot() | |
| print "Root of the XML is ", library.tag | |
| book_data = [] | |
| for book in library: | |
| book_id, author, title, genre, price, publish_date, description = ('',) * 7 | |
| book_id = book.attrib.get('id') | |
| for book_details in book: | |
| if book_details.tag == 'book_id': | |
| book_id = str(book_details.text).strip() | |
| elif book_details.tag == 'author': | |
| author = str(book_details.text).strip() | |
| elif book_details.tag == 'title': | |
| title = str(book_details.text).strip() | |
| elif book_details.tag == 'genre': | |
| genre = str(book_details.text).strip() | |
| elif book_details.tag == 'price': | |
| price = str(book_details.text).strip() | |
| elif book_details.tag == 'publish_date': | |
| publish_date = str(book_details.text).strip() | |
| elif book_details.tag == 'description': | |
| description = str(book_details.text).strip() | |
| t = (book_id, author, title, genre, price, publish_date, description) | |
| book_data.append(t) | |
| # Method 1 of inserting the values into DB | |
| cursor.execute('''INSERT INTO books( | |
| book_id, | |
| author, | |
| title, | |
| genre, | |
| price, | |
| publish_date, | |
| description) | |
| VALUES (?, ?, ?, ?, ?, ?, ?) ''', (book_id, author, title, genre, price, publish_date, description)) | |
| # Method 2 of inserting the values into DB | |
| # cursor.execute('''INSERT INTO books(book_id, author, title, genre, price, publish_date, description) | |
| # VALUES (:book_id, :author, :title, :genre, :price, :publish_date, :description)''', | |
| # {'book_id': book_id, 'author': author, 'title': title, 'genre': genre, 'price': price, | |
| # 'publish_date': publish_date, 'description': description}) | |
| # | |
| db.commit() | |
| # for book in book_data: | |
| # print book_data | |
| # Method 3 of inserting the values into DB | |
| # cursor.executemany('''INSERT INTO books( | |
| # book_id, | |
| # author, | |
| # title, | |
| # genre, | |
| # price, | |
| # publish_date, | |
| # description) | |
| # VALUES (?, ?, ?, ?, ?, ?, ?) ''', book_data) | |
| # db.commit() | |
| catalog = eTree.Element('catalog') | |
| cursor.execute('''SELECT * FROM books''') | |
| all_rows = cursor.fetchall() # Default Iterator | |
| for row in all_rows: | |
| # print '{0}, {1}, {2}, {3}, {4}, {5}, {6}'.format(str(row[0]), str(row[1]), str(row[2]), str(row[3]), str(row[4]), | |
| # str(row[5]), str(row[6])) | |
| book_info = eTree.Element('book') | |
| book_info.set('id', str(row[0])) | |
| eTree.SubElement(book_info, 'author').text = str(row[1]) | |
| eTree.SubElement(book_info, 'title').text = str(row[2]) | |
| eTree.SubElement(book_info, 'genre').text = str(row[3]) | |
| eTree.SubElement(book_info, 'price').text = str(row[4]) | |
| eTree.SubElement(book_info, 'publish_date').text = str(row[5]) | |
| eTree.SubElement(book_info, 'description').text = str(row[6]) | |
| catalog.append(book_info) | |
| db.close() | |
| # This has the indentation problem | |
| # eTree.ElementTree(catalog).write('new_book.xml', xml_declaration=True, encoding='utf-8') | |
| from xml.dom import minidom | |
| xml_str = minidom.parseString(eTree.tostring(catalog).encode('utf-8')).toprettyxml(indent=" ") | |
| with open("new_book.xml", 'w') as xml_file: | |
| xml_file.write(xml_str.encode('utf-8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment