Created
August 15, 2012 22:57
-
-
Save philipbl/3364453 to your computer and use it in GitHub Desktop.
Imports Evernote HTML into DayOne
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from optparse import OptionParser | |
from HTMLParser import HTMLParser | |
import os | |
import glob | |
import sys | |
import re | |
class Entry(): | |
def __init__(self): | |
self.tags = [] | |
self.created = None | |
self.updated = None | |
self.altitude = None | |
self.latitude = None | |
self.longitude = None | |
self.title = None | |
self.text = [] | |
def __str__(self): | |
s = "" | |
#s += "Tags: {0}\n".format(self.tags) | |
s += "Created: {0}\n".format(self.created) | |
#s += "Updated: {0}\n".format(self.updated) | |
#s += "Altitude: {0}\n".format(self.altitude) | |
s += "Latitude: {0}\n".format(self.latitude) | |
s += "Longitude: {0}\n".format(self.longitude) | |
s += "Title: {0}\n".format(self.title) | |
s += "Text: {0}\n".format(self.get_processed_text()[0:100] + "...") | |
return s | |
def get_image(self, tag): | |
for i in range(0, len(tag)): | |
if tag[i][0] == 'src': | |
return tag[i][1] | |
def get_processed_text(self): | |
s = self.title | |
for line in self.text: | |
if type(line) is list: | |
s += "\n\n".format(self.get_image(line)) | |
else: | |
# remove some characters that will give us trouble | |
s += '\n\n' + line.replace("\"", "\\\"").replace("`", "\`") | |
return s | |
class EvernoteParser(HTMLParser): | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.entry = Entry(); | |
self.collect_title = False | |
self.collect_body = False | |
def get_entry(self): | |
return self.entry | |
def handle_starttag(self, tag, attrs): | |
#print "Encountered a start tag:", tag | |
if tag == 'meta': | |
self.process_meta_tags(attrs) | |
elif tag == 'title': | |
self.collect_title = True | |
elif tag == 'body': | |
self.collect_body = True | |
elif tag == 'img': | |
self.entry.text.append(attrs) | |
def handle_endtag(self, tag): | |
if tag == 'body': | |
self.collect_body = False | |
def handle_data(self, data): | |
if self.collect_title: | |
self.entry.title = data | |
self.collect_title = False | |
elif self.collect_body: | |
self.entry.text.append(data) | |
def process_meta_tags(self, attrs): | |
if ('name', 'keywords') in attrs: | |
self.entry.tags = attrs[1][1].split(',') | |
elif ('name', 'altitude') in attrs: | |
self.entry.altitude = attrs[1][1] | |
elif ('name', 'created') in attrs: | |
self.entry.created = attrs[1][1] | |
elif ('name', 'latitude') in attrs: | |
self.entry.latitude = attrs[1][1] | |
elif ('name', 'longitude') in attrs: | |
self.entry.longitude = attrs[1][1] | |
elif ('name', 'updated') in attrs: | |
self.entry.updated = attrs[1][1] | |
def find_something(string): | |
fileList = os.listdir('/Users/philiplundrigan/Library/Mobile Documents/5U8NS4GX82~com~dayoneapp~dayone/Documents/Journal_dayone/entries') | |
for file in fileList: | |
f = open('/Users/philiplundrigan/Library/Mobile Documents/5U8NS4GX82~com~dayoneapp~dayone/Documents/Journal_dayone/entries/' + file) | |
try: | |
if re.findall(string, f.read()) != []: | |
return file | |
except: | |
print "ERROR" | |
return None | |
if __name__ == '__main__': | |
parser = OptionParser(usage="%prog [-f] [-q]", version="%prog 1.0") | |
parser.add_option("-f", "--file", dest="path", | |
help="location of the Evernote files", metavar="FILE") | |
parser.add_option("-q", "--quiet", | |
action="store_false", dest="verbose", default=True, | |
help="don't print status messages") | |
(options, args) = parser.parse_args() | |
path = options.path | |
verbose = options.verbose | |
if(path == None): | |
parser.print_usage() | |
quit() | |
files = glob.glob('{0}/*.html'.format(path)) | |
other_count = 0 | |
for count, filename in enumerate(files): | |
f = open(filename) | |
#if verbose: print 'Parsing {0}'.format(filename) | |
parser = EvernoteParser() | |
parser.feed(f.read()) | |
entry = parser.get_entry() | |
#if verbose: | |
# print '-'*20 | |
# print entry | |
# print '-'*20 | |
if entry.latitude != None: | |
file = find_something(entry.get_processed_text()[0:20]) | |
if file == None: | |
print "Error finding:", entry | |
else: | |
print "modifiying ", file | |
f = open('/Users/philiplundrigan/Library/Mobile Documents/5U8NS4GX82~com~dayoneapp~dayone/Documents/Journal_dayone/entries/' + file, 'r') | |
print f.read() | |
f.close() | |
#other_count += 1 | |
#print entry | |
#os.system('echo \"{0}\" | /Applications/Day\ One.app/Contents/MacOS/dayone -d=\"{1}\" new '.format(entry.get_processed_text() ,entry.created)) | |
#print "{0} / {1}".format(count+1, len(files)) | |
#if verbose: print '\n\n\n' | |
if verbose: print 'Done...' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This looks very interesting - any idea how to repurpose this to load a Facebook archive into DayOne?