rschroll · June 16, 2012 23:05
diff --git a/highlightedtext.py b/highlightedtext.py
 #!/usr/bin/env python

 """
 %s mount-point

 Print to a file information on the highlighted text of a selected file on
 the reader at mount-point.  The output file is tab-separated data of the
 form:
    page    highlight-range     mark-start      mark-end    mark-type   marked-text
 mark-start and mark-end are the 'pdfloc' data defining where the marked
 text starts and stops.  Some of this information is extracted into the
 highlight-range field, which has format:
    c,d,e,g -> C,D,E,G
 We think 'c' (and 'C') tell us the text line, 'd' and 'e' tell us the
 character, and 'g' is some kind of flag, but we don't understand how this
 information is encoded.  If you figure something out, please tell us:
    https://github.com/rschroll/prsannots/issues/4
 """

 # Copyright 2012 Robert Schroll
 #
 # This program is free software: you can redistribute it and/or modify
 # it under the terms of the GNU Lesser General Public License as
 # published by the Free Software Foundation, either version 3 of the
 # License, or (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 # GNU Lesser General Public License for more details.
 #
 # You should have received a copy of the GNU Lesser General Public
 # License along with this program.  If not, see
 # <http://www.gnu.org/licenses/>.

 import os
 import sys
 from prsannots.prst1 import Reader

 MT = {10: 'highlight', 11: 'text', 12: 'drawing'}

 def u_raw_input(prompt):
    """raw_input with unicode encoding/decoding."""
    return raw_input(prompt.encode(sys.stdout.encoding)).decode(sys.stdin.encoding)

 def select_book(books):
    print "Please select which book to get:"
    for i, book in enumerate(books):
        title = book.title or book.file.split('/')[-1]
        print "  %i. %s" % (i+1, title)
    which = u_raw_input("> ")
    try:
        return books[int(which) - 1]
    except (ValueError, IndexError):
        print "Could not understand your response.  Aborting."
    sys.exit(1)

 def main(path):
    reader = Reader(path)
    book = select_book(reader.books)
    
    c = reader.db.cursor()
    c.execute('''select page, mark, mark_end, markup_type, marked_text
                    from annotation
                    where content_id = ?
                    order by page''', (book.id,))
    
    outfn = os.path.splitext(os.path.basename(book.file))[0] + '.txt'
    userfn = u_raw_input("Enter output file name [%s]: " % outfn)
    if userfn:
        outfn = userfn
    
    f = open(outfn, 'w')
    for line in c:
        nums = [s[8:-2].split(',') for s in line[1:3]]
        hls = '%s,%s,%s,%s -> %s,%s,%s,%s' % tuple(nums[0][2:5] + nums[0][6:7] + nums[1][2:5] + nums[0][6:7])
        f.write('%i\t%s\t%s\t%s\t%s\t%s\n' % (int(line[0]+1), hls, line[1][:-1], line[2][:-1], MT[line[3]], line[4].encode('utf-8')))
    f.close()

 if __name__ == '__main__':
    if len(sys.argv) != 2:
        print __doc__ % sys.argv[0]
        sys.exit(0)
    if not os.path.ismount(sys.argv[1]):
        print "First argument must be mount point of Sony Reader."
        print "(%s does not appear to be a mount point.)" % sys.argv[1]
        sys.exit(1)
    main(sys.argv[1])
	#!/usr/bin/env python

	"""
	%s mount-point

	Print to a file information on the highlighted text of a selected file on
	the reader at mount-point. The output file is tab-separated data of the
	form:
	page highlight-range mark-start mark-end mark-type marked-text
	mark-start and mark-end are the 'pdfloc' data defining where the marked
	text starts and stops. Some of this information is extracted into the
	highlight-range field, which has format:
	c,d,e,g -> C,D,E,G
	We think 'c' (and 'C') tell us the text line, 'd' and 'e' tell us the
	character, and 'g' is some kind of flag, but we don't understand how this
	information is encoded. If you figure something out, please tell us:
	https://github.com/rschroll/prsannots/issues/4
	"""

	# Copyright 2012 Robert Schroll
	#
	# This program is free software: you can redistribute it and/or modify
	# it under the terms of the GNU Lesser General Public License as
	# published by the Free Software Foundation, either version 3 of the
	# License, or (at your option) any later version.
	#
	# This program is distributed in the hope that it will be useful,
	# but WITHOUT ANY WARRANTY; without even the implied warranty of
	# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	# GNU Lesser General Public License for more details.
	#
	# You should have received a copy of the GNU Lesser General Public
	# License along with this program. If not, see
	# <http://www.gnu.org/licenses/>.

	import os
	import sys
	from prsannots.prst1 import Reader

	MT = {10: 'highlight', 11: 'text', 12: 'drawing'}

	def u_raw_input(prompt):
	"""raw_input with unicode encoding/decoding."""
	return raw_input(prompt.encode(sys.stdout.encoding)).decode(sys.stdin.encoding)

	def select_book(books):
	print "Please select which book to get:"
	for i, book in enumerate(books):
	title = book.title or book.file.split('/')[-1]
	print " %i. %s" % (i+1, title)
	which = u_raw_input("> ")
	try:
	return books[int(which) - 1]
	except (ValueError, IndexError):
	print "Could not understand your response. Aborting."
	sys.exit(1)

	def main(path):
	reader = Reader(path)
	book = select_book(reader.books)

	c = reader.db.cursor()
	c.execute('''select page, mark, mark_end, markup_type, marked_text
	from annotation
	where content_id = ?
	order by page''', (book.id,))

	outfn = os.path.splitext(os.path.basename(book.file))[0] + '.txt'
	userfn = u_raw_input("Enter output file name [%s]: " % outfn)
	if userfn:
	outfn = userfn

	f = open(outfn, 'w')
	for line in c:
	nums = [s[8:-2].split(',') for s in line[1:3]]
	hls = '%s,%s,%s,%s -> %s,%s,%s,%s' % tuple(nums[0][2:5] + nums[0][6:7] + nums[1][2:5] + nums[0][6:7])
	f.write('%i\t%s\t%s\t%s\t%s\t%s\n' % (int(line[0]+1), hls, line[1][:-1], line[2][:-1], MT[line[3]], line[4].encode('utf-8')))
	f.close()

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print __doc__ % sys.argv[0]
	sys.exit(0)
	if not os.path.ismount(sys.argv[1]):
	print "First argument must be mount point of Sony Reader."
	print "(%s does not appear to be a mount point.)" % sys.argv[1]
	sys.exit(1)
	main(sys.argv[1])
No results found