Skip to content

Instantly share code, notes, and snippets.

@100ideas
Forked from ttscoff/ReadingListCatcher.py
Last active March 21, 2018 08:04
Show Gist options
  • Save 100ideas/d568607f918887bdfd86274d4d92307d to your computer and use it in GitHub Desktop.
Save 100ideas/d568607f918887bdfd86274d4d92307d to your computer and use it in GitHub Desktop.
A script for exporting Safari Reading List items to Markdown files and Pinboard bookmarks - with jupyter notebook playground version (look for python3 comment toggles)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
#!/usr/bin/python
# ReadingListCatcher
# - A script for exporting Safari Reading List items to Markdown and Pinboard
# Brett Terpstra 2015
# https://gist.github.com/ttscoff/f27f161f37bbc7f5b418
#
# Uses code from <https://gist.github.com/robmathers/5995026>
# Requires Python pinboard lib for Pinboard.in import:
# `easy_install pinboard` or `pip install pinboard`
import plistlib
from shutil import copy
import subprocess
import os
from tempfile import gettempdir
import sys
import re
import time
from datetime import date, datetime, timedelta
from os import path
import pytz
BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export
BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify
# call `plutil -convert xml1 <file>` to pre-process bookmark file in local directory
USE_PLUTIL = True # default
def copyTempFile(srcFile):
# Make a copy of the bookmarks and convert it from a binary plist to text
tempDirectory = gettempdir()
copy(srcFile, tempDirectory)
tmpFile = os.path.join(tempDirectory, os.path.basename(srcFile))
return tmpFile
def removeTempFile(tmpFile):
os.remove(tmpFile)
class _readingList():
def __init__(self, args):
print(args)
bookmarksFile = os.path.expanduser(args.input_file)
markdownFile = os.path.expanduser(args.output_file)
bookmarksFileCopy = copyTempFile(bookmarksFile)
sys.stdout.write('tmpfile bookmarksFileCopy: ')
print(bookmarksFileCopy)
self.postedCount = 0
self.content = ''
self.newcontent = ''
# last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c'))
last = time.strptime("2013-01-01 00:00:00 UTC", '%Y-%m-%d %H:%M:%S UTC')
if USE_PLUTIL or args.use_plutil:
converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy])
else:
converted = 0
if converted != 0:
print('Couldn\'t convert bookmarks plist from xml format')
sys.exit(converted)
if args.write:
if not os.path.exists(markdownFile):
open(markdownFile, 'a').close()
else:
with open (markdownFile, 'r') as mdInput:
self.content = mdInput.read()
matchLast = re.search(re.compile('(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} UTC)'), self.content)
if matchLast != None:
last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC')
last = datetime(*last[:6])
rx = re.compile("(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) UTC")
self.content = re.sub(rx,'',self.content).strip()
plist = plistlib.readPlist(bookmarksFileCopy)
# python2.7 error
# --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead
# with open(bookmarksFileCopy, 'rb') as fp:
# print(dir(plistlib)
# plist = plistlib.load(fp)
# There should only be one Reading List item, so take the first one
readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0]
if 'Children' in readingList:
cleanRx = re.compile("[\|\`\:_\*\n]")
for item in readingList['Children']:
if item['ReadingList']['DateAdded'] > last:
addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c')
title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8'))
# title = re.sub(cleanRx, ' ', item['URIDictionary']['title']) #python3
title = re.sub(' +', ' ', title)
url = item['URLString']
description = ''
if 'PreviewText' in item['ReadingList']:
description = item['ReadingList']['PreviewText'].encode('utf8')
# description = item['ReadingList']['PreviewText'] # python3
description = re.sub(cleanRx, ' ', description)
description = re.sub(' +', ' ', description)
self.itemToMarkdown(addtime, title, url, description)
else:
break
pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark'
if args.write:
mdHandle = open(markdownFile, 'w')
mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + " UTC\n\n")
mdHandle.write(self.newcontent + self.content)
mdHandle.close()
if self.postedCount > 0:
sys.stdout.write('\n' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + ' UTC\n')
sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + "\n")
sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile)
else:
sys.stdout.write('No new bookmarks found in Reading List')
sys.stdout.write("\n")
removeTempFile(bookmarksFileCopy)
def itemToMarkdown(self, addtime, title, url, description):
self.newcontent += '- [' + title + '](' + url + ' "Added on ' + addtime + '")'
if not description == '':
self.newcontent += "\n\n > " + description
self.newcontent += "\n\n"
self.postedCount += 1
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument("-f", "--out-file", dest="output_file", default=BOOKMARKS_MARKDOWN_FILE,
help="output markdown file", metavar="outfile")
parser.add_argument("-b", "--bookmarks-file", dest="input_file", default=BOOKMARKS_PLIST,
help="input Bookmarks.plist file", metavar="infile")
parser.add_argument("--no-write", dest="write", action='store_false',
help="write to output file")
parser.add_argument("--no-plutil", dest="use_plutil", action='store_false',
help="disable plutil system call - useful for running in jupyter or on linux.\nWARN you must parse the plist file yourself")
args = parser.parse_args()
_readingList(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment