cstrouse · April 7, 2013 07:11 · cstrouse · Apr 7, 2013
diff --git a/kindle-highlight-extractor.py b/kindle-highlight-extractor.py
 import binascii
 import re

 # highlights are stored in *.mbp files corresponding to the filename of the book
 #    in ~/Library/Application Support/Kindle/My Kindle Content/

 # EA 44 41 54 41 - beginning of highlight
 # 44 41 54 41 - end of highlight (except last highlight which *sometimes* ends with 42 4B 4D 4B)

 fh = '0131177052.WrkngEffLegCode.mbp'

 with open(fh, 'rb') as f:
 	content = f.read()

 data = binascii.hexlify(content)

 # Regex doesn't work reliably yet
 matches = re.findall('ea4441544100000[1.](.*?)44415441', data, re.S)
 for match in matches:
 	line = binascii.unhexlify(match)
 	print line[1:]
 	
 print
 print str(len(matches)) + ' highlights found'
	import binascii
	import re

	# highlights are stored in *.mbp files corresponding to the filename of the book
	# in ~/Library/Application Support/Kindle/My Kindle Content/

	# EA 44 41 54 41 - beginning of highlight
	# 44 41 54 41 - end of highlight (except last highlight which sometimes ends with 42 4B 4D 4B)

	fh = '0131177052.WrkngEffLegCode.mbp'

	with open(fh, 'rb') as f:
	content = f.read()

	data = binascii.hexlify(content)

	# Regex doesn't work reliably yet
	matches = re.findall('ea4441544100000[1.](.*?)44415441', data, re.S)
	for match in matches:
	line = binascii.unhexlify(match)
	print line[1:]

	print
	print str(len(matches)) + ' highlights found'
No results found