Skip to content

Instantly share code, notes, and snippets.

@vpetro
Created March 1, 2011 19:12
Show Gist options
  • Save vpetro/849682 to your computer and use it in GitHub Desktop.
Save vpetro/849682 to your computer and use it in GitHub Desktop.
SAX-based parser to extract blocks of elements as Python dictionaries
from xml.sax import ContentHandler, parseString
from datetime import datetime
class Parser(ContentHandler):
def __init__(self, block_name, attrs=None):
ContentHandler.__init__(self)
self._block_name = block_name
self._name = None
self._blocks = list()
self._current_block = None
self._content = ""
self._attrs = attrs
def startElement(self, name, attrs):
if self._block_name == name:
self._current_block = list()
else:
self._name = name
if self._attrs and self._name in self._attrs:
for elem_name, attr_name in self._attrs.items():
if elem_name == self._name:
self._current_block.append(("%s_%s" % (elem_name, attr_name), attrs[attr_name]))
def characters(self, content):
self._content += content
def endElement(self, name):
if self._name == name and len(self._content) > 0:
self._current_block.append((self._name, self._content.strip()))
self._content = ""
if self._block_name == name:
self._blocks.append(self._current_block)
self._current_block = None
def endDocument(self):
result = list()
for block in self._blocks:
new_dict = {}
for (name, value) in block:
if name in new_dict:
if isinstance(new_dict[name], list):
new_dict[name].append(value)
else:
lst = [new_dict[name], value]
new_dict[name] = lst
else:
new_dict[name] = value
result.append(new_dict)
self._blocks = result
def parse_date(date_string):
date = None
if date_string is None or len(date_string) == 0:
return None
if date_string == "ongoing":
now = datetime.now()
end_date = None
try:
end_date = datetime(year=now.year, month=now.month, day=now.day+7)
except ValueError:
if now.month == 12:
end_date = datetime(year=now.year+1, month=1, day=7)
else:
end_date = datetime(year=now.year, month=now.month+1, day=7)
return end_date
# try to split the string on a space
split_string = date_string.split()
# if there are two portions to the date the first is the
# date, the second is the time.
if len(split_string) > 1:
date_string = split_string[0]
# split the string to get out the parts of the date
year, month, day = date_string.split('-')
# create a new datetime object
date = datetime(int(year), int(month), int(day))
return date
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment