Skip to content

Instantly share code, notes, and snippets.

@nandha005
Created January 12, 2018 05:57
Show Gist options
  • Save nandha005/061516cfdcb8a5355aa926aeb842f5c3 to your computer and use it in GitHub Desktop.
Save nandha005/061516cfdcb8a5355aa926aeb842f5c3 to your computer and use it in GitHub Desktop.
Xml to csv task
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.3 (C:\Users\nandha.kumar.ramani\AppData\Local\Programs\Python\Python36-32\python.exe)" project-jdk-type="Python SDK" />
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/XmlCsv.iml" filepath="$PROJECT_DIR$/.idea/XmlCsv.iml" />
</modules>
</component>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
</component>
</module>
user_data.xml:54:0:ERROR:SCHEMASV:SCHEMAV_ELEMENT_CONTENT: Element 'Address': Missing child element(s). Expected is ( Pincode ).
user_data.xml:72:19:FATAL:PARSER:ERR_TAG_NAME_MISMATCH: Opening and ending tag mismatch: Pincode line 71 and Address
user_data.xml:74:15:FATAL:PARSER:ERR_TAG_NAME_MISMATCH: Opening and ending tag mismatch: Address line 68 and Insurer
user_data.xml:89:8:FATAL:PARSER:ERR_TAG_NAME_MISMATCH: Opening and ending tag mismatch: Insurer line 61 and Hcsc
user_data.xml:89:8:FATAL:PARSER:ERR_TAG_NOT_FINISHED: Premature end of data in tag Hcsc line 2
title author author
Sandman Volume 1: Preludes and Nocturnes Neil Gaiman Neil Gaiman
Good Omens Neil Gamain Neil Gamain
Good Omens Terry Pratchett Terry Pratchett
The Man And The Goat Bubber Elderidge Bubber Elderidge
Once Upon A Time in LA Dr Dre Dr Dre
There Will Never Be Justice IR Jury IR Jury
Beginning Python Peter Norton, et al Peter Norton, et al
FirstName LastName Premium Age Town Country Pincode PolicyNumber
Nandha Kumar 5000 22 Udumalpet India 626641 1234
Prasanna Prakasam 8000 20 Udumalpet India 626641 1231
Priya Dharshini 21000 26 Udumalpet India 626641 1232
guru lakshmi 7000 24 Udumalpet India 626641 1235
Moni sha 5500 27 Udumalpet India 626641 1236
Nandha Kumar 7050 28 Udumalpet India 626641 1237
Name Premium
Nandha kumar 5000
Prasanna 8000
GuruLakshmi 21000
Moni 7000
Nivi 5500
Priya 7050
Nandha Kumar 5000 22 Udumalpet India 626641 1234
Prasanna Prakasam 8000 20 Udumalpet India 626641 1231
Priya Dharshini 21000 26 Udumalpet India 626641 1232
guru lakshmi 7000 24 Udumalpet India 626641 1235
Moni sha 5500 27 Udumalpet India 626641 1236
Nandha Kumar 7050 28 Udumalpet India 626641 1237
from xml.dom.minidom import parse
import xml.dom.minidom
import csv
def writeToCSV(myData):
with open('output1.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',',quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['Name', 'Premium'])
Insurers = myData.getElementsByTagName("Insurer")
for Insurer in Insurers:
titleValue = Insurer.getElementsByTagName("Name")[0].childNodes[0].data
premium = [] # get all the authors in a vector
for pre in Insurer.getElementsByTagName("Premium"):
premium.append(pre.childNodes[0].data)
writer.writerow([titleValue] + premium) # write to csv
doc = parse('user_data.xml')
myData = doc.getElementsByTagName("Hcsc")[0]
# Print each book's title
writeToCSV(myData)
from xml.dom.minidom import parse
import xml.dom.minidom
import csv
def writeToCSV(myDatas):
with open('output3.csv', 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(['FirstName','LastName','Premium','Age','Town','Country','Pincode','PolicyNumber'])
for myData in myDatas:
Insurers = myData.getElementsByTagName("Insurer")
for Insurer in Insurers:
#For Name Tag
Fn,Ln=nameFun(Insurer)
#for Premium Tag
pr=premiumFun(Insurer)
#for Age Tag
ag=ageFun(Insurer)
#for address tag
tn,cn,pn=addressFun(Insurer)
#for Policy NUmber tag
pnum=policyFun(Insurer)
writer.writerow([Fn] +[Ln] + [pr] + [ag]+[tn]+[cn]+[pn] + [pnum] ) # write to csv
def nameFun(Insurer):
names = Insurer.getElementsByTagName("Name")
for Name in names:
FirstNames = Name.getElementsByTagName("FirstName")
for FirstName in FirstNames:
Fn = FirstName.childNodes[0].data
LastNames = Name.getElementsByTagName("LastName")
for LastName in LastNames:
Ln = LastName.childNodes[0].data
return Fn,Ln
def premiumFun(Insurer):
premiums = Insurer.getElementsByTagName("Premium")
for premium in premiums:
pr = premium.childNodes[0].data
return pr
def ageFun(Insurer):
Ages = Insurer.getElementsByTagName("Age")
for age in Ages:
ag = age.childNodes[0].data
return ag
def addressFun(Insurer):
addresses = Insurer.getElementsByTagName("Address")
for address in addresses:
Towns = address.getElementsByTagName("Town")
for town in Towns:
tn = town.childNodes[0].data
Countries = address.getElementsByTagName("Country")
for country in Countries:
cn = country.childNodes[0].data
Pincodes = address.getElementsByTagName("Pincode")
for pincode in Pincodes:
pn = pincode.childNodes[0].data
return tn,cn,pn
def policyFun(Insurer):
PolicyNumbers = Insurer.getElementsByTagName("PolicyNumber")
for policyNumber in PolicyNumbers:
pnum = policyNumber.childNodes[0].data
return pnum
import validation
xml_result,xsd_result=validation.validate()
if xml_result==1&xsd_result==1:
doc = parse('user_data.xml')
myDatas = doc.getElementsByTagName("Hcsc")
writeToCSV(myDatas)
<?xml version="1.0" ?>
<Hcsc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="user_data.xsd">
<Insurer Policyid="123">
<Name>
<FirstName>Nandha</FirstName>
<LastName>Kumar</LastName>
</Name>
<Premium>5000</Premium>
<Age>22</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1234</PolicyNumber>
</Insurer>
<Insurer Policyid="124">
<Name>
<FirstName>Prasanna</FirstName>
<LastName>Prakasam</LastName>
</Name>
<Premium>8000</Premium>
<Age>20</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1231</PolicyNumber>
</Insurer>
<Insurer Policyid="125">
<Name>
<FirstName>Priya</FirstName>
<LastName>Dharshini</LastName>
</Name>
<Premium>21000</Premium>
<Age>26</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1232</PolicyNumber>
</Insurer>
<Insurer Policyid="126">
<Name>
<FirstName>guru</FirstName>
<LastName>lakshmi</LastName>
</Name>
<Premium>7000</Premium>
<Age>24</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1235</PolicyNumber>
</Insurer>
<Insurer Policyid="127">
<Name>
<FirstName>Moni</FirstName>
<LastName>sha</LastName>
</Name>
<Premium>5500</Premium>
<Age>27</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1236</PolicyNumber>
</Insurer>
<Insurer Policyid="128">
<Name>
<FirstName>Nandha</FirstName>
<LastName>Kumar</LastName>
</Name>
<Premium>7050</Premium>
<Age>28</Age>
<Address>
<Town>Udumalpet</Town>
<Country>India</Country>
<Pincode>626641</Pincode>
</Address>
<PolicyNumber>1237</PolicyNumber>
</Insurer>
</Hcsc>
<?xml version="1.0" ?>
<xs:schema xmlns:xs = "http://www.w3.org/2001/XMLSchema">
<xs:element name="Hcsc">
<xs:complexType>
<xs:sequence>
<xs:element name="Insurer" maxOccurs="unbounded">
<xs:complexType>
<xs:sequence>
<xs:element name="Name">
<xs:complexType>
<xs:sequence>
<xs:element name="FirstName" type="xs:string">
</xs:element>
<xs:element name="LastName" type="xs:string">
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="Premium" type="xs:string">
</xs:element>
<xs:element name="Age" type="xs:int">
</xs:element>
<xs:element name="Address">
<xs:complexType>
<xs:sequence>
<xs:element name="Town" type="xs:string">
</xs:element>
<xs:element name="Country" type="xs:string">
</xs:element>
<xs:element name="Pincode" type="xs:string">
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="PolicyNumber" type="xs:string">
</xs:element>
</xs:sequence>
<xs:attribute name="Policyid" type="xs:int" use="required">
</xs:attribute>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
from lxml import etree
def validate():
result1,result2 = 0,0
xsd_doc = etree.parse("user_data.xsd")
xmlschema = etree.XMLSchema(xsd_doc)
# parse xml
try:
doc = etree.parse("user_data.xml")
print('XML well formed, syntax ok.')
result1= 1
# check for file IO error
except IOError:
print('Invalid File')
# check for XML syntax errors
except etree.XMLSyntaxError as err:
print('XML Syntax Error, see error_syntax.log')
with open('error_syntax.log', 'w') as error_log_file:
error_log_file.write(str(err.error_log))
quit()
except:
print('Unknown error, exiting.')
quit()
# validate against schema
try:
xmlschema.assertValid(doc)
print('XML valid, schema validation ok.')
result2=1
except etree.DocumentInvalid as err:
print('Schema validation error, see error_schema.log')
with open('error_schema.log', 'w') as error_log_file:
error_log_file.write(str(err.error_log))
quit()
except:
print('Unknown error, exiting.')
quit()
return result1,result2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment