Created
July 27, 2012 07:26
-
-
Save xiaoganghan/3186646 to your computer and use it in GitHub Desktop.
Parsing Evernote export file (.enex) using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd"> | |
<en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)"> | |
<note><title>Vim Tips</title><content><![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?> | |
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd"> | |
<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;"> | |
yank for copy, delete for cut, put for parse | |
<div><br/></div> | |
<div>Move in context, not position</div> | |
<div>/ search forward</div> | |
<div>? search backward</div> | |
<div>n repeat last search</div> | |
<div>N repeat last search but in the opposite direction</div> | |
<div>tx move to 'x'</div> | |
<div>fx find 'x'</div> | |
</en-note> | |
]]></content><created>20101229T161500Z</created><updated>20101231T161039Z</updated><note-attributes/></note> | |
</en-export> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from lxml import etree | |
from StringIO import StringIO | |
#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/ | |
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False) | |
def parseNoteXML(xmlFile): | |
context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False) | |
note_dict = {} | |
notes = [] | |
for ind, (action, elem) in enumerate(context): | |
text = elem.text | |
if elem.tag == 'content': | |
text = [] | |
r = etree.parse(StringIO(elem.text.encode('utf-8')), p) | |
for e in r.iter(): | |
try: | |
text.append(e.text) | |
except: | |
print 'cannot print' | |
note_dict[elem.tag] = text | |
if elem.tag == "note": | |
notes.append(note_dict) | |
note_dict = {} | |
return notes | |
if __name__ == '__main__': | |
notes = parseNoteXML('mynote.enex') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[{'content': ['\nyank for copy, delete for cut, put for parse\n', | |
None, | |
None, | |
'Move in context, not position', | |
'/ search forward', | |
'? search backward', | |
'n repeat last search', | |
'N repeat last search but in the opposite direction', | |
"tx move to 'x'", | |
"fx find 'x'"], | |
'created': '20101229T161500Z', | |
'note': None, | |
'note-attributes': None, | |
'title': 'Vim Tips', | |
'updated': '20101231T161039Z'}] |
Python 3 version:
from lxml import etree
from io import BytesIO
#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
def parseNoteXML(xmlFile):
context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False)
note_dict = {}
notes = []
for ind, (action, elem) in enumerate(context):
text = elem.text
if elem.tag == 'content':
text = []
r = etree.parse(BytesIO(elem.text.encode('utf-8')), p)
for e in r.iter():
try:
text.append(e.text)
except:
print('cannot print')
note_dict[elem.tag] = text
if elem.tag == "note":
notes.append(note_dict)
note_dict = {}
return notes
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Good gemacht!