Skip to content

Instantly share code, notes, and snippets.

@tilacog
Last active November 10, 2017 16:11
Show Gist options
  • Save tilacog/847929657f6a676a9a70fc1344518943 to your computer and use it in GitHub Desktop.
Save tilacog/847929657f6a676a9a70fc1344518943 to your computer and use it in GitHub Desktop.
converter xsds do esocial para csv
import os
import csv
import sys
from glob import glob
from xml.etree import ElementTree as ET
xsd_dir_path = 'PUT BASE DIRECTORY HERE :)'
no_duplicate_tables = '--no-duplicates' in sys.argv
done_tables = set()
def strip_ns(root_node):
for el in root_node.iter():
if '}' in el.tag:
el.tag = el.tag.split('}', 1)[1] # strip all namespaces
def is_table(node):
if not node.tag == 'complexType':
return False
el_type = node.attrib.get('name')
if not el_type:
return False
return True if el_type.lower().startswith('t') else False
def documentation(node):
return next(node.iter('documentation')).text.strip().split('\n')[0]
def handle_element(element_node):
# Nome da Coluna
name = element_node.attrib["name"]
# Descrição da Coluna
description = documentation(element_node)
# Opcional?
minoccurs = element_node.attrib.get("minOccurs")
minoccurs = int(minoccurs) if minoccurs else 99
optional = True if int(minoccurs) < 1 else False
return (name, optional, description,)
def handle_xsd_file(filepath):
root = ET.parse(filepath).getroot()
strip_ns(root)
root_documentation = documentation(root)
tables = [el for el in root.iter() if is_table(el)]
for tidx, t in enumerate(tables, start=1):
table_name = t.attrib["name"]
# skip if table was already handled
if no_duplicate_tables:
if table_name in done_tables:
continue
table_description = documentation(t)
elements = [handle_element(e) for e in t.iter()
if e.tag == 'element']
for eidx, e in enumerate(elements, start=1):
ename, eoptional, edescr = e
yield (
root_documentation,
tidx,
table_name,
table_description,
eidx,
ename,
eoptional,
edescr,
)
done_tables.add(table_name)
writer = csv.writer(sys.stdout)
for f in glob(xsd_dir_path + '/*'):
basename = os.path.basename(f)
for row in handle_xsd_file(f):
writer.writerow([basename, *row])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment