Skip to content

Instantly share code, notes, and snippets.

@aculich
Created March 31, 2015 19:39
Show Gist options
  • Select an option

  • Save aculich/a7500ec0dfc3b912f37e to your computer and use it in GitHub Desktop.

Select an option

Save aculich/a7500ec0dfc3b912f37e to your computer and use it in GitHub Desktop.
extract highlighted content from evernote exported .enex files
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# quick and dirty extraction script that should be replaced by proper XSLT templates
# sudo apt-get install -y python3-dateutil
from lxml import etree
import dateutil
import re
docxml = 'BeStill.enex'
print(docxml)
doc_root = etree.parse(docxml)
content = doc_root.xpath('''.//content/text()[1]''')[0]
content_no_encoding = re.sub(r"""<\?xml version="1.0" encoding="UTF-8"\?>""", '', content, re.IGNORECASE)
#print(content_no_encoding[:50])
#parser = etree.XMLParser(recover=True)
#xml = etree.fromstring(content, parser)
#notes = etree.tostring(content, encoding='UTF-8', xml_declaration=False)
notes = etree.XML(content_no_encoding)
attributes = [x.values() for x in notes.xpath('''//span[contains(@style, '-evernote-highlighted')]''')]
print(dateutil.parser.parse('\n'.join(doc_root.xpath('''//created/text()'''))).isoformat().split('T')[0])
print(re.sub(r'''.*\xa0''', '', '\n'.join(doc_root.xpath('''//title/text()'''))))
print('\n'.join(doc_root.xpath('''//source-url/text()''')))
print(', '.join([x for x in doc_root.xpath('''//tag/text()''')]))
for x in notes.xpath('''//span[contains(@style, '-evernote-highlighted')]/text()'''):
print(x)
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment