Created
October 27, 2009 08:46
-
-
Save dopuskh3/219419 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
from lxml import etree | |
CARD_MULT=1 | |
CARD_SINGLE=0 | |
class xmlStor: | |
properties = {} | |
def __init__(self): | |
self.tree = None | |
for i in self.properties.keys(): | |
if not self.properties[i].has_key('card'): | |
self.properties[i]['card'] = CARD_SINGLE | |
@property | |
def blob(self): | |
if self.tree is not None: | |
return etree.tostring(self.tree) | |
return None | |
# @blob.setter | |
def setBlob(self, blob): | |
self.tree = etree.fromstring(blob) | |
def _checkproperty(self, name): | |
""" Check if a valid property with the given name exists """ | |
if self.properties.has_key(name): | |
if self.properties[name].has_key('xpath'): | |
return True | |
return False | |
def __getattr__(self, name): | |
if name == "blob": | |
return self.blob() | |
elif name == "tree": | |
return self.__dict__["tree"] | |
if self._checkproperty(name): | |
try: | |
# execute xpath | |
nodes = self.tree.xpath(self.properties[name]['xpath']) | |
# check for results | |
if nodes and len(nodes) > 0: | |
# if this property is an attribute | |
if self.properties[name].has_key('attribute'): | |
# check cardinality | |
if self.properties[name]['card'] == CARD_MULT: | |
return [ n.get(self.properties[name]['attribute'], u"") for n in nodes ] | |
else: | |
return nodes[0].get(self.properties[name]['attribute'], u"") | |
else: | |
if self.properties[name]['card'] == CARD_MULT: | |
return [ n.text for n in nodes ] | |
else: | |
return nodes[0].text | |
# no results | |
else: | |
return None | |
# xpath failed | |
except Exception, e: | |
return None | |
# this property does not exists | |
else: | |
return None | |
def __setattr__(self, name, value): | |
if name == "blob": | |
self.setBlob(value) | |
return | |
if name == "tree": | |
self.__dict__["tree"] = value | |
return | |
if self._checkproperty(name): | |
try: | |
# execute xpath | |
nodes = self.tree.xpath(self.properties[name]['xpath']) | |
# check for results | |
if nodes and len(nodes) > 0: | |
# check cardinality | |
if type(value).__name__ == "list" and self.properties[name]['card'] == CARD_MULT: | |
l = min(len(name), len(nodes)) | |
for nodeNum in range(0, l-1): | |
# for an attribute | |
if self.properties[name].has_key('attribute'): | |
nodes[nodeNum].set(self.properties[name]['attribute'], value[nodeNum]) | |
else: | |
nodes[nodeNum].text = value[nodeNum] | |
else: | |
if self.properties[name].has_key("attribute"): | |
nodes[0].set(self.properties[name]['attribute'], value) | |
else: | |
nodes[0].text = value | |
# no results | |
else: | |
return None | |
if type(value).__name__ == "list": | |
if len(value) > len(nodes): | |
# take the parent node of the last node | |
parentNode = nodes[len(nodes)-1].getparent() | |
for i in range(len(nodes)-1, len(value)-1): | |
n = etree.SubElement(parentNode, nodes[len(nodes)-1].tag) | |
if self.properties[name].has_key('attribute'): | |
n.set(self.properties[name]['attribute'], value[i]) | |
else: | |
n.text = value[i] | |
# xpath failed | |
except Exception, e: | |
return None | |
# this property does not exists | |
else: | |
return None | |
class testBlob(xmlStor): | |
properties = {"update": { "xpath": "//span[@class='update']" }, | |
"uuid": { "xpath": "//div[@class='product']", "attribute": "id"}, | |
"status": { "xpath": "//span[@class='status']" }, | |
"links": { "xpath": "//a", "card": CARD_MULT, "attribute": "href" }, | |
"uriLink": { "xpath": "//a[@class='uri']" } } | |
if __name__ == "__main__": | |
t = etree.parse(sys.argv[1]) | |
print "Parsed" | |
blobs = [] | |
for b in t.xpath("//div[@class='product']"): | |
print "---------------" | |
blob = testBlob() | |
blob.blob = etree.tostring(b) | |
blob.uuid = "urn:uuid:"+blob.uriLink | |
blob.links.append("http://foobar") | |
links = blob.links | |
links.append("http://foobar") | |
blob.links = links | |
blobs.append(blob) | |
for b in blobs: | |
print "--" | |
print """ | |
Update: %s | |
uuid: %s | |
uriLink: %s | |
status: %s | |
links: %s"""%(b.update, b.uuid, b.uriLink, b.status, str(b.links)) | |
f = open("out.html", "w") | |
for i in blobs: | |
f.write(i.blob) | |
f.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment