Skip to content

Instantly share code, notes, and snippets.

@bdunnette
Last active August 29, 2015 13:57
Show Gist options
  • Save bdunnette/9898264 to your computer and use it in GitHub Desktop.
Save bdunnette/9898264 to your computer and use it in GitHub Desktop.
import sys
import os
import os.path
import zipfile
from lxml import etree
from lxml import objectify
tempdir = 'temp'
docfile = zipfile.ZipFile(sys.argv[1])
docfile.extractall(tempdir)
with open(os.path.join(tempdir, 'content.xml')) as f:
xml = f.read()
root = objectify.fromstring(xml)
for el in root.body.presentation.iterchildren(tag="{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}page"):
image_filename = el.frame.image.attrib["{http://www.w3.org/1999/xlink}href"]
print image_filename
notes = getattr(el, "{urn:oasis:names:tc:opendocument:xmlns:presentation:1.0}notes")
text_box = getattr(notes, "{urn:oasis:names:tc:opendocument:xmlns:drawing:1.0}frame")["text-box"]
span = getattr(text_box, "{urn:oasis:names:tc:opendocument:xmlns:text:1.0}p").span
print span
import sys
import os
import os.path
import zipfile
from lxml import etree
tempdir = 'temp'
docfile = zipfile.ZipFile(sys.argv[1])
docfile.extractall(tempdir)
presentation = etree.parse(os.path.join(tempdir, 'ppt', 'presentation.xml')).getroot()
slide_list = presentation.find("{http://schemas.openxmlformats.org/presentationml/2006/main}sldIdLst")
slides = []
for slide in slide_list:
new_slide = {'id': slide.attrib['id'], 'rid': slide.get("{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id")}
slides.append(new_slide)
presentation_rels = etree.parse(os.path.join(tempdir, 'ppt', '_rels', 'presentation.xml.rels')).getroot()
for rel in presentation_rels.getchildren():
rel_id = rel.get('Id')
rel_slide = [[slides.index(slide), slide] for slide in slides if slide['rid'] == rel_id]
if rel_slide:
slides[rel_slide[0][0]]['file'] = rel.get('Target')
for slide in slides:
slide_file = slide['file']
print slide_file
slide_rel_parse = etree.parse(os.path.join(tempdir, 'ppt', 'slides', '_rels', slide_file.split('/')[1] + '.rels'))
print etree.tostring(slide_rel_parse)
rels = slide_rel_parse.findall('*')
print rels
for r in rels:
print etree.tostring(r)
print r.attrib['Target']
#slide_parse = etree.parse(os.path.join(tempdir, 'ppt', slide_file))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment