sainathadapa · September 1, 2016 11:06
diff --git a/.gitignore b/.gitignore
 *.enex
diff --git a/mynote.xml b/mynote.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
 <en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
  <note>
    <title>Vim Tips</title>
    <content>
      <![CDATA[
           <?xml version="1.0" encoding="UTF-8" standalone="no"?>
           <!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
               <en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
               yank for copy, delete for cut, put for parse
               <div><br/></div>
               <div>Move in context, not position</div>
               <div>/ search forward</div>
               <div>? search backward</div>
               <div>n repeat last search</div>
               <div>N repeat last search but in the opposite direction</div>
               <div>tx move to 'x'</div>
               <div>fx find 'x'</div>
           </en-note>
      ]]>
    </content>
    <created>20101229T161500Z</created>
    <updated>20101231T161039Z</updated>
    <note-attributes/>
  </note>
 </en-export>
diff --git a/parse_evernote.py b/parse_evernote.py
 #!/usr/bin/env python3

 from base64 import b64decode
 import hashlib
 from lxml import etree
 from io import BytesIO
 import os
 from time import strptime

 from pypandoc import convert_text

 #http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
 p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)

 def parse_content(content):
    text = convert_text(content, 'org', format='html')
    return text

 def parse_resource(resource):
    rsc_dict = {}
    for elem in resource:
        if elem.tag == 'data':
            # Some times elem.text is None
            rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b''
            rsc_dict['hash'] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
        else:
            rsc_dict[elem.tag] = elem.text

    return rsc_dict

 def parse_note(note):
    note_dict = {}
    resources = []
    for elem in note:
        if elem.tag == 'content':
            note_dict[elem.tag] = parse_content(elem.text)
            # A copy of original content
            note_dict['content-raw'] = elem.text
        elif elem.tag == 'resource':
            resources.append(parse_resource(elem))
        elif elem.tag == 'created' or elem.tag == 'updated':
            note_dict[elem.tag] = strptime(elem.text, '%Y%m%dT%H%M%SZ')
        else:
            note_dict[elem.tag] = elem.text

    note_dict['resource'] = resources

    return note_dict

 def parseNoteXML(xmlFile):
    # Without huge_tree set to True, parser may complain about huge text node
    # Try to recover, because there may be "&nbsp;", which will cause
    # "XMLSyntaxError: Entity 'nbsp' not defined"
    context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False, huge_tree=True, recover=True)
    for action, elem in context:
        if elem.tag == "note":
            yield parse_note(elem)

 # Save notes and attachments
 # in directories named according to date of creation
 def export_note(note):
    date = note['created']
    year = str(date.tm_year)
    mon = '%02d' % date.tm_mon
    mday = '%02d' % date.tm_mday
    note_dir = os.path.join('en-export', year, mon, mday)
    os.makedirs(note_dir, exist_ok=True)
    # Remove "/" from filenames
    title = note['title'].replace('/', ' ')[:20]
    text_file = os.path.join(note_dir, title +'.org')
    with open(text_file, 'w') as fd:
        # Write the original title
        fd.write('#+TITLE: ' + note['title'] + '\n')
        fd.write(note['content'])
    bak_file = os.path.join(note_dir, title+'.bak')
    with open(bak_file, 'w') as fd:
        fd.write(note['content-raw'])
    for resource in note['resource']:
        rsc_file = os.path.join(note_dir, resource['hash']+'.data')
        data = resource['data']
        with open(rsc_file, 'wb') as fd:
            fd.write(data)

 if __name__ == '__main__':
    notes = parseNoteXML('mynote.enex')
    for note in notes:
        export_note(note)
diff --git a/result b/result
 [{'content': ['\nyank for copy, delete for cut, put for parse\n',
              None,
              None,
              'Move in context, not position',
              '/ search forward',
              '? search backward',
              'n repeat last search',
              'N repeat last search but in the opposite direction',
              "tx move to 'x'",
              "fx find 'x'"],
  'created': '20101229T161500Z',
  'note': None,
  'note-attributes': None,
  'title': 'Vim Tips',
  'updated': '20101231T161039Z'}]
	<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export2.dtd">
	<en-export export-date="20120727T073610Z" application="Evernote" version="Evernote Mac 3.0.5 (209942)">
	<note>
	<title>Vim Tips</title>
	<content>
	<![CDATA[
	<?xml version="1.0" encoding="UTF-8" standalone="no"?>
	<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
	<en-note style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">
	yank for copy, delete for cut, put for parse
	<div><br/></div>
	<div>Move in context, not position</div>
	<div>/ search forward</div>
	<div>? search backward</div>
	<div>n repeat last search</div>
	<div>N repeat last search but in the opposite direction</div>
	<div>tx move to 'x'</div>
	<div>fx find 'x'</div>
	</en-note>
	]]>
	</content>
	<created>20101229T161500Z</created>
	<updated>20101231T161039Z</updated>
	<note-attributes/>
	</note>
	</en-export>
	#!/usr/bin/env python3

	from base64 import b64decode
	import hashlib
	from lxml import etree
	from io import BytesIO
	import os
	from time import strptime

	from pypandoc import convert_text

	#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
	p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)

	def parse_content(content):
	text = convert_text(content, 'org', format='html')
	return text

	def parse_resource(resource):
	rsc_dict = {}
	for elem in resource:
	if elem.tag == 'data':
	# Some times elem.text is None
	rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b''
	rsc_dict['hash'] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
	else:
	rsc_dict[elem.tag] = elem.text

	return rsc_dict

	def parse_note(note):
	note_dict = {}
	resources = []
	for elem in note:
	if elem.tag == 'content':
	note_dict[elem.tag] = parse_content(elem.text)
	# A copy of original content
	note_dict['content-raw'] = elem.text
	elif elem.tag == 'resource':
	resources.append(parse_resource(elem))
	elif elem.tag == 'created' or elem.tag == 'updated':
	note_dict[elem.tag] = strptime(elem.text, '%Y%m%dT%H%M%SZ')
	else:
	note_dict[elem.tag] = elem.text

	note_dict['resource'] = resources

	return note_dict

	def parseNoteXML(xmlFile):
	# Without huge_tree set to True, parser may complain about huge text node
	# Try to recover, because there may be " ", which will cause
	# "XMLSyntaxError: Entity 'nbsp' not defined"
	context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False, huge_tree=True, recover=True)
	for action, elem in context:
	if elem.tag == "note":
	yield parse_note(elem)

	# Save notes and attachments
	# in directories named according to date of creation
	def export_note(note):
	date = note['created']
	year = str(date.tm_year)
	mon = '%02d' % date.tm_mon
	mday = '%02d' % date.tm_mday
	note_dir = os.path.join('en-export', year, mon, mday)
	os.makedirs(note_dir, exist_ok=True)
	# Remove "/" from filenames
	title = note['title'].replace('/', ' ')[:20]
	text_file = os.path.join(note_dir, title +'.org')
	with open(text_file, 'w') as fd:
	# Write the original title
	fd.write('#+TITLE: ' + note['title'] + '\n')
	fd.write(note['content'])
	bak_file = os.path.join(note_dir, title+'.bak')
	with open(bak_file, 'w') as fd:
	fd.write(note['content-raw'])
	for resource in note['resource']:
	rsc_file = os.path.join(note_dir, resource['hash']+'.data')
	data = resource['data']
	with open(rsc_file, 'wb') as fd:
	fd.write(data)

	if __name__ == '__main__':
	notes = parseNoteXML('mynote.enex')
	for note in notes:
	export_note(note)
	[{'content': ['\nyank for copy, delete for cut, put for parse\n',
	None,
	None,
	'Move in context, not position',
	'/ search forward',
	'? search backward',
	'n repeat last search',
	'N repeat last search but in the opposite direction',
	"tx move to 'x'",
	"fx find 'x'"],
	'created': '20101229T161500Z',
	'note': None,
	'note-attributes': None,
	'title': 'Vim Tips',
	'updated': '20101231T161039Z'}]