100ideas · March 21, 2018 08:04
diff --git a/ReadingListCatcher.ipynb b/ReadingListCatcher.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 485,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!/usr/bin/python\n",
    "# ReadingListCatcher\n",
    "# - A script for exporting Safari Reading List items to Markdown and Pinboard\n",
    "#   Brett Terpstra 2015\n",
    "#   https://gist.github.com/ttscoff/f27f161f37bbc7f5b418\n",
    "#\n",
    "# Uses code from <https://gist.github.com/robmathers/5995026>\n",
    "# Requires Python pinboard lib for Pinboard.in import:\n",
    "#     `easy_install pinboard` or `pip install pinboard`\n",
    "\n",
    "import plistlib\n",
    "from shutil import copy\n",
    "import subprocess\n",
    "import os\n",
    "from tempfile import gettempdir\n",
    "import sys\n",
    "import atexit\n",
    "import re\n",
    "import time\n",
    "from datetime import date, datetime, timedelta\n",
    "from os import path\n",
    "import pytz"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 594,
   "metadata": {},
   "outputs": [],
   "source": [
    "# DEFAULT_EXPORT_TYPE = 'pb' # pb, md or all.\n",
    "DEFAULT_EXPORT_TYPE = 'md' # pb, md or all\n",
    "\n",
    "# BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export\n",
    "# BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify\n",
    "BOOKMARKS_MARKDOWN_FILE = 'Safari-ReadingList.md' # Markdown file if using md export\n",
    "BOOKMARKS_PLIST = 'Bookmarks.plist' # Shouldn't need to modify"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 595,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Bookmarks.plist', 'Safari-ReadingList.md')"
      ]
     },
     "execution_count": 595,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bookmarksFile = os.path.expanduser(BOOKMARKS_PLIST)\n",
    "markdownFile = os.path.expanduser(BOOKMARKS_MARKDOWN_FILE)\n",
    "bookmarksFile, markdownFile"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 596,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'/tmp/Bookmarks.plist'"
      ]
     },
     "execution_count": 596,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Make a copy of the bookmarks and convert it from a binary plist to text\n",
    "tempDirectory = gettempdir()\n",
    "copy(bookmarksFile, tempDirectory)\n",
    "bookmarksFileCopy = os.path.join(tempDirectory, os.path.basename(bookmarksFile))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 597,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<function __main__.removeTempFile>"
      ]
     },
     "execution_count": 597,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def removeTempFile():\n",
    "    os.remove(bookmarksFileCopy)\n",
    "\n",
    "atexit.register(removeTempFile) # Delete the temp file when the script finishes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 598,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'BookmarksFileCopy.plist'"
      ]
     },
     "execution_count": 598,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bookmarksFileCopy = os.path.expanduser('BookmarksFileCopy.plist')\n",
    "bookmarksFileCopy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 661,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "_StoreAction(option_strings=['-f', '--file'], dest='output_file', nargs=None, const=None, default='~/Dropbox/Safari-ReadingList.md', type=None, choices=None, help='output markdown file', metavar='file')"
      ]
     },
     "execution_count": 661,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "_StoreFalseAction(option_strings=['--no-write'], dest='write', nargs=0, const=False, default=True, type=None, choices=None, help='write to output file', metavar=None)"
      ]
     },
     "execution_count": 661,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "_StoreFalseAction(option_strings=['--no-plutil'], dest='use_plutil', nargs=0, const=False, default=True, type=None, choices=None, help='disable plutil system call', metavar=None)"
      ]
     },
     "execution_count": 661,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "Namespace(output_file='hi.md', use_plutil=False, write=False)"
      ]
     },
     "execution_count": 661,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from argparse import ArgumentParser\n",
    "parser = ArgumentParser()\n",
    "parser.add_argument(\"-f\", \"--file\", dest=\"output_file\", default='~/Dropbox/Safari-ReadingList.md',\n",
    "                    help=\"output markdown file\", metavar=\"file\")\n",
    "\n",
    "parser.add_argument(\"--no-write\", dest=\"write\", action='store_false',\n",
    "                    help=\"write to output file\")\n",
    "\n",
    "parser.add_argument(\"--no-plutil\", dest=\"use_plutil\", action='store_false',\n",
    "                    help=\"disable plutil system call\")\n",
    "\n",
    "args = parser.parse_args('-f hi.md --no-write --no-plutil'.split())\n",
    "\n",
    "args"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 677,
   "metadata": {},
   "outputs": [],
   "source": [
    "class _readingList():\n",
    "    def __init__(self, args):\n",
    "        \n",
    "        markdownFile = os.path.expanduser(args.output_file)\n",
    "        \n",
    "        self.postedCount = 0\n",
    "        self.content = ''\n",
    "        self.newcontent = ''\n",
    "        # last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c'))\n",
    "        last = time.strptime(\"2013-01-01 00:00:00 UTC\", '%Y-%m-%d %H:%M:%S UTC')\n",
    "\n",
    "        if args.use_plutil:\n",
    "            converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy])\n",
    "        else:\n",
    "            converted = 0\n",
    "        \n",
    "        if converted != 0:\n",
    "            print('Couldn\\'t convert bookmarks plist from xml format')\n",
    "            sys.exit(converted)\n",
    "\n",
    "        if args.write:\n",
    "            if not os.path.exists(markdownFile):\n",
    "                open(markdownFile, 'a').close()\n",
    "            else:    \n",
    "                with open (markdownFile, 'r') as mdInput:\n",
    "                    self.content = mdInput.read()\n",
    "                    matchLast = re.search(re.compile('(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} UTC)'), self.content)\n",
    "                    if matchLast != None:\n",
    "                        last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC') \n",
    "        \n",
    "        last = datetime(*last[:6])\n",
    "        \n",
    "        rx = re.compile(\"(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) UTC\")\n",
    "        self.content = re.sub(rx,'',self.content).strip()\n",
    "        \n",
    "        # plist = plistlib.readPlist(bookmarksFileCopy)\n",
    "        # --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead\n",
    "        with open(bookmarksFileCopy, 'rb') as fp:\n",
    "            plist = plistlib.load(fp)\n",
    "                    \n",
    "        # There should only be one Reading List item, so take the first one\n",
    "        readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0]\n",
    "\n",
    "        if 'Children' in readingList:\n",
    "            cleanRx = re.compile(\"[\\|\\`\\:_\\*\\n]\")\n",
    "            \n",
    "            for item in readingList['Children']:\n",
    "                if item['ReadingList']['DateAdded'] > last:\n",
    "                    addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c')\n",
    "                    #       title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8'))\n",
    "                    title = re.sub(cleanRx, ' ', item['URIDictionary']['title'])\n",
    "                    title = re.sub(' +', ' ', title)\n",
    "                    url = item['URLString']\n",
    "                    description = ''\n",
    "\n",
    "                    if 'PreviewText' in item['ReadingList']:\n",
    "            #           description = item['ReadingList']['PreviewText'].encode('utf8')\n",
    "                        description = item['ReadingList']['PreviewText']\n",
    "                        description = re.sub(cleanRx, ' ', description)\n",
    "                        description = re.sub(' +', ' ', description)\n",
    "\n",
    "                    self.itemToMarkdown(addtime, title, url, description)\n",
    "\n",
    "                else:\n",
    "                    break\n",
    "\n",
    "        pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark'\n",
    "        \n",
    "        if args.write:\n",
    "            mdHandle = open(markdownFile, 'w')\n",
    "            mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + \" UTC\\n\\n\")\n",
    "            mdHandle.write(self.newcontent + self.content)\n",
    "            mdHandle.close()\n",
    "            \n",
    "        if self.postedCount > 0:\n",
    "            sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + \"\\n\")\n",
    "            sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile)\n",
    "        else:\n",
    "            sys.stdout.write('No new bookmarks found in Reading List')\n",
    "            \n",
    "        sys.stdout.write(\"\\n\")\n",
    "        \n",
    "    def itemToMarkdown(self, addtime, title, url, description):\n",
    "        self.newcontent += '- [' + title + '](' + url + ' \"Added on ' + addtime + '\")'\n",
    "        if not description == '':\n",
    "            self.newcontent += \"\\n\\n    > \" + description\n",
    "        self.newcontent += \"\\n\\n\"\n",
    "        self.postedCount += 1\n",
    "\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 676,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Found 674 new bookmarks\n",
      "WARN save=false; not writing to hi.md\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<__main__._readingList at 0x7f289e10de48>"
      ]
     },
     "execution_count": 676,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_readingList(args)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 539,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<_sre.SRE_Match object; span=(0, 32), match='Updated: 2018-03-21 04:11:55 UTC'>"
      ]
     },
     "execution_count": 539,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "time.struct_time(tm_year=2018, tm_mon=3, tm_mday=21, tm_hour=4, tm_min=11, tm_sec=55, tm_wday=2, tm_yday=80, tm_isdst=-1)"
      ]
     },
     "execution_count": 539,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "datetime.datetime(2018, 3, 21, 4, 11, 55)"
      ]
     },
     "execution_count": 539,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "506"
      ]
     },
     "execution_count": 539,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cn = 'Updated: 2018-03-21 04:11:55 UTC\\n\\n- [Bussed out how America moves thousands of homeless people around the country US news The Guardian](https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homeless-people-country-study \"Added on Tue Mar 20 21:32:03 2018\")\\n\\n- [Favorites](https://www.jstatsoft.org/article/view/v046i03/v46i03.pdf \"Added on Tue Mar 20 19:02:16 2018\")\\n\\n- [Favorites](http://demo.thi.ng/umbrella/router-basics/#/home \"Added on Tue Mar 20 10:03:26 2018\")\\n\\nLast Updated: 2013-01-01 00:00:00 UTC'\n",
    "\n",
    "matchLast = re.search(re.compile('(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} UTC)'), cn)\n",
    "matchLast\n",
    "last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC')\n",
    "last\n",
    "datetime(*last[:6])\n",
    "rx = re.compile(\"(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) UTC\")\n",
    "cn2 = re.sub(rx,'',cn).strip()\n",
    "len(cn2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 540,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['a']['1'] = 'bob'\n",
      "['a']['2'] = 'fred'\n",
      "['a']['3'] = 'henry'\n",
      "['b']['5'] = 'fred'\n",
      "['b']['7'] = 'henry'\n",
      "['b']['9']['innerinner'] = 'pascale'\n",
      " = '- [Bussed out how America moves thousands of homeless people around the country US news The Guardian](https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homele\n"
     ]
    }
   ],
   "source": [
    "# https://stackoverflow.com/a/35380129\n",
    "def print_dict(v, prefix=''):\n",
    "    if isinstance(v, dict):\n",
    "        for k, v2 in v.items():\n",
    "            p2 = \"{}['{}']\".format(prefix, k)\n",
    "            print_dict(v2, p2)\n",
    "    elif isinstance(v, list):\n",
    "        for i, v2 in enumerate(v):\n",
    "            p2 = \"{}[{}]\".format(prefix, i)\n",
    "            print_dict(v2, p2)\n",
    "    else:\n",
    "        print(('{} = {}'.format(prefix, repr(v)))[0:200])\n",
    "\n",
    "itm = {'a': {1:'bob', 2: 'fred', 3: 'henry'},\n",
    "       'b': {5:'fred',7: 'henry', 9: {'innerinner': 'pascale'}} \n",
    "      }\n",
    "\n",
    "print_dict(itm)\n",
    "print_dict(cn2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 441,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "datetime.datetime(2013, 1, 1, 0, 0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "datetime.datetime(2018, 3, 20, 21, 32, 3)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['ReadingList']['DateAdded'] = datetime.datetime(2018, 3, 20, 21, 32, 3)\n",
      "['ReadingListNonSync']['AddedLocally'] = True\n",
      "['ReadingListNonSync']['neverFetchMetadata'] = False\n",
      "['Sync']['Data'] = Data(b'bplist00\\xd4\\x01\\x02\\x03\\x04\\x05\\x06\\x80\\x81X$versionX$objectsY$archiverT$top\\x12\\x00\\x01\\x86\\xa0\\xaf\\x10#\\x07\\x08\\x15\\x19 2345678>?CFGJKNORSVW[_cjkorvz|U$null\\xd6\\t\\n\\x0b\\x0\n",
      "['Sync']['ServerID'] = 'DD14B293-DC36-4DBB-9995-23AB2F9D1424'\n",
      "['URIDictionary']['title'] = 'Bussed out: how America moves thousands of homeless people around the country | US news | The Guardian'\n",
      "['URLString'] = 'https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homeless-people-country-study'\n",
      "['WebBookmarkType'] = 'WebBookmarkTypeLeaf'\n",
      "['WebBookmarkUUID'] = '78CD6E59-FA68-4351-A80C-308179BADE3B'\n"
     ]
    }
   ],
   "source": [
    "if 'Children' in readingList:\n",
    "  cnt = 0\n",
    "  cleanRx = re.compile(\"[\\|\\`\\:_\\*\\n]\")\n",
    "  for item in readingList['Children']:\n",
    "    cnt = cnt + 1\n",
    "    \n",
    "    if cnt < 2:\n",
    "#       display(addtime, title, url, description)\n",
    "      itm = item['ReadingList']\n",
    "      display(last)\n",
    "      display(item['ReadingList']['DateAdded'])\n",
    "      display(item['ReadingList']['DateAdded'] > last)\n",
    "\n",
    "      print_dict(item)\n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 644,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "_StoreAction(option_strings=['-f', '--file'], dest='output_file', nargs=None, const=None, default='~/Dropbox/Safari-ReadingList.md', type=None, choices=None, help='output markdown file', metavar='file')"
      ]
     },
     "execution_count": 644,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "_StoreFalseAction(option_strings=['--no-write'], dest='write', nargs=0, const=False, default=True, type=None, choices=None, help='write to output file', metavar=None)"
      ]
     },
     "execution_count": 644,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "_StoreFalseAction(option_strings=['--no-plutil'], dest='plutul', nargs=0, const=False, default=True, type=None, choices=None, help='disable plutil system call', metavar=None)"
      ]
     },
     "execution_count": 644,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "'hi.md'"
      ]
     },
     "execution_count": 644,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "Namespace(output_file='hi.md', plutul=False, write=False)"
      ]
     },
     "execution_count": 644,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from argparse import ArgumentParser\n",
    "parser = ArgumentParser()\n",
    "parser.add_argument(\"-f\", \"--file\", dest=\"output_file\", default='~/Dropbox/Safari-ReadingList.md',\n",
    "                    help=\"output markdown file\", metavar=\"file\")\n",
    "\n",
    "parser.add_argument(\"--no-write\", dest=\"write\", action='store_false',\n",
    "                    help=\"write to output file\")\n",
    "\n",
    "parser.add_argument(\"--no-plutil\", dest=\"plutul\", action='store_false',\n",
    "                    help=\"disable plutil system call\")\n",
    "\n",
    "args = parser.parse_args('-f hi.md --no-write --no-plutil'.split())\n",
    "\n",
    "args"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 654,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "hi.md\n",
      "--no-write\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'dict' object has no attribute 'output_file'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-654-2a0fc54f8ad2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      9\u001b[0m \u001b[0;31m# args2 = dict(output_file='test')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0margprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m<ipython-input-654-2a0fc54f8ad2>\u001b[0m in \u001b[0;36margprint\u001b[0;34m(args1)\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0margprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0margs1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--no-write\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;31m#     print(args1.write)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'output_file'"
     ]
    }
   ],
   "source": [
    "def argprint (args1):\n",
    "    print(args1.output_file)\n",
    "    print(\"--no-write\")\n",
    "#     print(args1.write)\n",
    "\n",
    "argprint(args)\n",
    "\n",
    "args2 = {'output_file': 'test'}\n",
    "# args2 = dict(output_file='test')\n",
    "\n",
    "argprint(args2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
diff --git a/ReadingListCatcher.py b/ReadingListCatcher.py
 #!/usr/bin/python
 # ReadingListCatcher
 # - A script for exporting Safari Reading List items to Markdown and Pinboard
 #   Brett Terpstra 2015
 #   https://gist.github.com/ttscoff/f27f161f37bbc7f5b418
 #
 # Uses code from <https://gist.github.com/robmathers/5995026>
 # Requires Python pinboard lib for Pinboard.in import:
 #     `easy_install pinboard` or `pip install pinboard`

 import plistlib
 from shutil import copy
 import subprocess
 import os
 from tempfile import gettempdir
 import sys
 import re
 import time
 from datetime import date, datetime, timedelta
 from os import path
 import pytz

 BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export
 BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify
 # call `plutil -convert xml1 <file>` to pre-process bookmark file in local directory

 USE_PLUTIL = True  # default

 def copyTempFile(srcFile):
    # Make a copy of the bookmarks and convert it from a binary plist to text
    tempDirectory = gettempdir()
    copy(srcFile, tempDirectory)
    tmpFile = os.path.join(tempDirectory, os.path.basename(srcFile))
    return tmpFile

 def removeTempFile(tmpFile):
    os.remove(tmpFile)

 class _readingList():
    def __init__(self, args):

        print(args)

        bookmarksFile = os.path.expanduser(args.input_file)
        markdownFile = os.path.expanduser(args.output_file)

        bookmarksFileCopy = copyTempFile(bookmarksFile)
        sys.stdout.write('tmpfile bookmarksFileCopy: ')
        print(bookmarksFileCopy)

        self.postedCount = 0
        self.content = ''
        self.newcontent = ''
        # last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c'))
        last = time.strptime("2013-01-01 00:00:00 UTC", '%Y-%m-%d %H:%M:%S UTC')

        if USE_PLUTIL or args.use_plutil:
            converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy])
        else:
            converted = 0

        if converted != 0:
            print('Couldn\'t convert bookmarks plist from xml format')
            sys.exit(converted)

        if args.write:
            if not os.path.exists(markdownFile):
                open(markdownFile, 'a').close()
            else:
                with open (markdownFile, 'r') as mdInput:
                    self.content = mdInput.read()
                    matchLast = re.search(re.compile('(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} UTC)'), self.content)
                    if matchLast != None:
                        last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC')

        last = datetime(*last[:6])

        rx = re.compile("(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) UTC")
        self.content = re.sub(rx,'',self.content).strip()

        plist = plistlib.readPlist(bookmarksFileCopy)

        # python2.7 error
        # --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead
 #        with open(bookmarksFileCopy, 'rb') as fp:
 #            print(dir(plistlib)
 #            plist = plistlib.load(fp)

        # There should only be one Reading List item, so take the first one
        readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0]

        if 'Children' in readingList:
            cleanRx = re.compile("[\|\`\:_\*\n]")

            for item in readingList['Children']:
                if item['ReadingList']['DateAdded'] > last:
                    addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c')
                    title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8'))
                    # title = re.sub(cleanRx, ' ', item['URIDictionary']['title'])  #python3
                    title = re.sub(' +', ' ', title)
                    url = item['URLString']
                    description = ''

                    if 'PreviewText' in item['ReadingList']:
                        description = item['ReadingList']['PreviewText'].encode('utf8')
                        # description = item['ReadingList']['PreviewText'] # python3
                        description = re.sub(cleanRx, ' ', description)
                        description = re.sub(' +', ' ', description)

                    self.itemToMarkdown(addtime, title, url, description)

                else:
                    break

        pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark'

        if args.write:
            mdHandle = open(markdownFile, 'w')
            mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + " UTC\n\n")
            mdHandle.write(self.newcontent + self.content)
            mdHandle.close()

        if self.postedCount > 0:
            sys.stdout.write('\n' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + ' UTC\n')
            sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + "\n")
            sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile)
        else:
            sys.stdout.write('No new bookmarks found in Reading List')

        sys.stdout.write("\n")

        removeTempFile(bookmarksFileCopy)

    def itemToMarkdown(self, addtime, title, url, description):
        self.newcontent += '- [' + title + '](' + url + ' "Added on ' + addtime + '")'
        if not description == '':
            self.newcontent += "\n\n    > " + description
        self.newcontent += "\n\n"
        self.postedCount += 1

 if __name__ == "__main__":

    from argparse import ArgumentParser
    parser = ArgumentParser()
    parser.add_argument("-f", "--out-file", dest="output_file", default=BOOKMARKS_MARKDOWN_FILE,
            help="output markdown file", metavar="outfile")
    parser.add_argument("-b", "--bookmarks-file", dest="input_file", default=BOOKMARKS_PLIST,
            help="input Bookmarks.plist file", metavar="infile")
    parser.add_argument("--no-write", dest="write", action='store_false',
            help="write to output file")
    parser.add_argument("--no-plutil", dest="use_plutil", action='store_false',
           help="disable plutil system call - useful for running in jupyter or on linux.\nWARN you must parse the plist file yourself")

    args = parser.parse_args()
    _readingList(args)
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 485,
	"metadata": {},
	"outputs": [],
	"source": [
	"#!/usr/bin/python\n",
	"# ReadingListCatcher\n",
	"# - A script for exporting Safari Reading List items to Markdown and Pinboard\n",
	"# Brett Terpstra 2015\n",
	"# https://gist.github.com/ttscoff/f27f161f37bbc7f5b418\n",
	"#\n",
	"# Uses code from <https://gist.github.com/robmathers/5995026>\n",
	"# Requires Python pinboard lib for Pinboard.in import:\n",
	"# `easy_install pinboard` or `pip install pinboard`\n",
	"\n",
	"import plistlib\n",
	"from shutil import copy\n",
	"import subprocess\n",
	"import os\n",
	"from tempfile import gettempdir\n",
	"import sys\n",
	"import atexit\n",
	"import re\n",
	"import time\n",
	"from datetime import date, datetime, timedelta\n",
	"from os import path\n",
	"import pytz"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 594,
	"metadata": {},
	"outputs": [],
	"source": [
	"# DEFAULT_EXPORT_TYPE = 'pb' # pb, md or all.\n",
	"DEFAULT_EXPORT_TYPE = 'md' # pb, md or all\n",
	"\n",
	"# BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export\n",
	"# BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify\n",
	"BOOKMARKS_MARKDOWN_FILE = 'Safari-ReadingList.md' # Markdown file if using md export\n",
	"BOOKMARKS_PLIST = 'Bookmarks.plist' # Shouldn't need to modify"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 595,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"('Bookmarks.plist', 'Safari-ReadingList.md')"
	]
	},
	"execution_count": 595,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"bookmarksFile = os.path.expanduser(BOOKMARKS_PLIST)\n",
	"markdownFile = os.path.expanduser(BOOKMARKS_MARKDOWN_FILE)\n",
	"bookmarksFile, markdownFile"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 596,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'/tmp/Bookmarks.plist'"
	]
	},
	"execution_count": 596,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Make a copy of the bookmarks and convert it from a binary plist to text\n",
	"tempDirectory = gettempdir()\n",
	"copy(bookmarksFile, tempDirectory)\n",
	"bookmarksFileCopy = os.path.join(tempDirectory, os.path.basename(bookmarksFile))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 597,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"<function __main__.removeTempFile>"
	]
	},
	"execution_count": 597,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"def removeTempFile():\n",
	" os.remove(bookmarksFileCopy)\n",
	"\n",
	"atexit.register(removeTempFile) # Delete the temp file when the script finishes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 598,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'BookmarksFileCopy.plist'"
	]
	},
	"execution_count": 598,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"bookmarksFileCopy = os.path.expanduser('BookmarksFileCopy.plist')\n",
	"bookmarksFileCopy"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 661,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"_StoreAction(option_strings=['-f', '--file'], dest='output_file', nargs=None, const=None, default='~/Dropbox/Safari-ReadingList.md', type=None, choices=None, help='output markdown file', metavar='file')"
	]
	},
	"execution_count": 661,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"_StoreFalseAction(option_strings=['--no-write'], dest='write', nargs=0, const=False, default=True, type=None, choices=None, help='write to output file', metavar=None)"
	]
	},
	"execution_count": 661,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"_StoreFalseAction(option_strings=['--no-plutil'], dest='use_plutil', nargs=0, const=False, default=True, type=None, choices=None, help='disable plutil system call', metavar=None)"
	]
	},
	"execution_count": 661,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"Namespace(output_file='hi.md', use_plutil=False, write=False)"
	]
	},
	"execution_count": 661,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from argparse import ArgumentParser\n",
	"parser = ArgumentParser()\n",
	"parser.add_argument(\"-f\", \"--file\", dest=\"output_file\", default='~/Dropbox/Safari-ReadingList.md',\n",
	" help=\"output markdown file\", metavar=\"file\")\n",
	"\n",
	"parser.add_argument(\"--no-write\", dest=\"write\", action='store_false',\n",
	" help=\"write to output file\")\n",
	"\n",
	"parser.add_argument(\"--no-plutil\", dest=\"use_plutil\", action='store_false',\n",
	" help=\"disable plutil system call\")\n",
	"\n",
	"args = parser.parse_args('-f hi.md --no-write --no-plutil'.split())\n",
	"\n",
	"args"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 677,
	"metadata": {},
	"outputs": [],
	"source": [
	"class _readingList():\n",
	" def __init__(self, args):\n",
	" \n",
	" markdownFile = os.path.expanduser(args.output_file)\n",
	" \n",
	" self.postedCount = 0\n",
	" self.content = ''\n",
	" self.newcontent = ''\n",
	" # last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c'))\n",
	" last = time.strptime(\"2013-01-01 00:00:00 UTC\", '%Y-%m-%d %H:%M:%S UTC')\n",
	"\n",
	" if args.use_plutil:\n",
	" converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy])\n",
	" else:\n",
	" converted = 0\n",
	" \n",
	" if converted != 0:\n",
	" print('Couldn\\'t convert bookmarks plist from xml format')\n",
	" sys.exit(converted)\n",
	"\n",
	" if args.write:\n",
	" if not os.path.exists(markdownFile):\n",
	" open(markdownFile, 'a').close()\n",
	" else: \n",
	" with open (markdownFile, 'r') as mdInput:\n",
	" self.content = mdInput.read()\n",
	" matchLast = re.search(re.compile('(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} UTC)'), self.content)\n",
	" if matchLast != None:\n",
	" last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC') \n",
	" \n",
	" last = datetime(*last[:6])\n",
	" \n",
	" rx = re.compile(\"(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) UTC\")\n",
	" self.content = re.sub(rx,'',self.content).strip()\n",
	" \n",
	" # plist = plistlib.readPlist(bookmarksFileCopy)\n",
	" # --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead\n",
	" with open(bookmarksFileCopy, 'rb') as fp:\n",
	" plist = plistlib.load(fp)\n",
	" \n",
	" # There should only be one Reading List item, so take the first one\n",
	" readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0]\n",
	"\n",
	" if 'Children' in readingList:\n",
	" cleanRx = re.compile(\"[\\\|\\`\\:_\\*\\n]\")\n",
	" \n",
	" for item in readingList['Children']:\n",
	" if item['ReadingList']['DateAdded'] > last:\n",
	" addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c')\n",
	" # title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8'))\n",
	" title = re.sub(cleanRx, ' ', item['URIDictionary']['title'])\n",
	" title = re.sub(' +', ' ', title)\n",
	" url = item['URLString']\n",
	" description = ''\n",
	"\n",
	" if 'PreviewText' in item['ReadingList']:\n",
	" # description = item['ReadingList']['PreviewText'].encode('utf8')\n",
	" description = item['ReadingList']['PreviewText']\n",
	" description = re.sub(cleanRx, ' ', description)\n",
	" description = re.sub(' +', ' ', description)\n",
	"\n",
	" self.itemToMarkdown(addtime, title, url, description)\n",
	"\n",
	" else:\n",
	" break\n",
	"\n",
	" pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark'\n",
	" \n",
	" if args.write:\n",
	" mdHandle = open(markdownFile, 'w')\n",
	" mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + \" UTC\\n\\n\")\n",
	" mdHandle.write(self.newcontent + self.content)\n",
	" mdHandle.close()\n",
	" \n",
	" if self.postedCount > 0:\n",
	" sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + \"\\n\")\n",
	" sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile)\n",
	" else:\n",
	" sys.stdout.write('No new bookmarks found in Reading List')\n",
	" \n",
	" sys.stdout.write(\"\\n\")\n",
	" \n",
	" def itemToMarkdown(self, addtime, title, url, description):\n",
	" self.newcontent += '- [' + title + '](' + url + ' \"Added on ' + addtime + '\")'\n",
	" if not description == '':\n",
	" self.newcontent += \"\\n\\n > \" + description\n",
	" self.newcontent += \"\\n\\n\"\n",
	" self.postedCount += 1\n",
	"\n",
	" \n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 676,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Found 674 new bookmarks\n",
	"WARN save=false; not writing to hi.md\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"<__main__._readingList at 0x7f289e10de48>"
	]
	},
	"execution_count": 676,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"_readingList(args)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 539,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"<_sre.SRE_Match object; span=(0, 32), match='Updated: 2018-03-21 04:11:55 UTC'>"
	]
	},
	"execution_count": 539,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"time.struct_time(tm_year=2018, tm_mon=3, tm_mday=21, tm_hour=4, tm_min=11, tm_sec=55, tm_wday=2, tm_yday=80, tm_isdst=-1)"
	]
	},
	"execution_count": 539,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"datetime.datetime(2018, 3, 21, 4, 11, 55)"
	]
	},
	"execution_count": 539,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"506"
	]
	},
	"execution_count": 539,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"cn = 'Updated: 2018-03-21 04:11:55 UTC\\n\\n- [Bussed out how America moves thousands of homeless people around the country US news The Guardian](https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homeless-people-country-study \"Added on Tue Mar 20 21:32:03 2018\")\\n\\n- [Favorites](https://www.jstatsoft.org/article/view/v046i03/v46i03.pdf \"Added on Tue Mar 20 19:02:16 2018\")\\n\\n- [Favorites](http://demo.thi.ng/umbrella/router-basics/#/home \"Added on Tue Mar 20 10:03:26 2018\")\\n\\nLast Updated: 2013-01-01 00:00:00 UTC'\n",
	"\n",
	"matchLast = re.search(re.compile('(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} UTC)'), cn)\n",
	"matchLast\n",
	"last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC')\n",
	"last\n",
	"datetime(*last[:6])\n",
	"rx = re.compile(\"(?m)^Updated: (\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}) UTC\")\n",
	"cn2 = re.sub(rx,'',cn).strip()\n",
	"len(cn2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 540,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"['a']['1'] = 'bob'\n",
	"['a']['2'] = 'fred'\n",
	"['a']['3'] = 'henry'\n",
	"['b']['5'] = 'fred'\n",
	"['b']['7'] = 'henry'\n",
	"['b']['9']['innerinner'] = 'pascale'\n",
	" = '- [Bussed out how America moves thousands of homeless people around the country US news The Guardian](https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homele\n"
	]
	}
	],
	"source": [
	"# https://stackoverflow.com/a/35380129\n",
	"def print_dict(v, prefix=''):\n",
	" if isinstance(v, dict):\n",
	" for k, v2 in v.items():\n",
	" p2 = \"{}['{}']\".format(prefix, k)\n",
	" print_dict(v2, p2)\n",
	" elif isinstance(v, list):\n",
	" for i, v2 in enumerate(v):\n",
	" p2 = \"{}[{}]\".format(prefix, i)\n",
	" print_dict(v2, p2)\n",
	" else:\n",
	" print(('{} = {}'.format(prefix, repr(v)))[0:200])\n",
	"\n",
	"itm = {'a': {1:'bob', 2: 'fred', 3: 'henry'},\n",
	" 'b': {5:'fred',7: 'henry', 9: {'innerinner': 'pascale'}} \n",
	" }\n",
	"\n",
	"print_dict(itm)\n",
	"print_dict(cn2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 441,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"datetime.datetime(2013, 1, 1, 0, 0)"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"datetime.datetime(2018, 3, 20, 21, 32, 3)"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"['ReadingList']['DateAdded'] = datetime.datetime(2018, 3, 20, 21, 32, 3)\n",
	"['ReadingListNonSync']['AddedLocally'] = True\n",
	"['ReadingListNonSync']['neverFetchMetadata'] = False\n",
	"['Sync']['Data'] = Data(b'bplist00\\xd4\\x01\\x02\\x03\\x04\\x05\\x06\\x80\\x81X$versionX$objectsY$archiverT$top\\x12\\x00\\x01\\x86\\xa0\\xaf\\x10#\\x07\\x08\\x15\\x19 2345678>?CFGJKNORSVW[_cjkorvz\|U$null\\xd6\\t\\n\\x0b\\x0\n",
	"['Sync']['ServerID'] = 'DD14B293-DC36-4DBB-9995-23AB2F9D1424'\n",
	"['URIDictionary']['title'] = 'Bussed out: how America moves thousands of homeless people around the country \| US news \| The Guardian'\n",
	"['URLString'] = 'https://www.theguardian.com/us-news/ng-interactive/2017/dec/20/bussed-out-america-moves-homeless-people-country-study'\n",
	"['WebBookmarkType'] = 'WebBookmarkTypeLeaf'\n",
	"['WebBookmarkUUID'] = '78CD6E59-FA68-4351-A80C-308179BADE3B'\n"
	]
	}
	],
	"source": [
	"if 'Children' in readingList:\n",
	" cnt = 0\n",
	" cleanRx = re.compile(\"[\\\|\\`\\:_\\*\\n]\")\n",
	" for item in readingList['Children']:\n",
	" cnt = cnt + 1\n",
	" \n",
	" if cnt < 2:\n",
	"# display(addtime, title, url, description)\n",
	" itm = item['ReadingList']\n",
	" display(last)\n",
	" display(item['ReadingList']['DateAdded'])\n",
	" display(item['ReadingList']['DateAdded'] > last)\n",
	"\n",
	" print_dict(item)\n",
	"\n",
	" "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 644,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"_StoreAction(option_strings=['-f', '--file'], dest='output_file', nargs=None, const=None, default='~/Dropbox/Safari-ReadingList.md', type=None, choices=None, help='output markdown file', metavar='file')"
	]
	},
	"execution_count": 644,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"_StoreFalseAction(option_strings=['--no-write'], dest='write', nargs=0, const=False, default=True, type=None, choices=None, help='write to output file', metavar=None)"
	]
	},
	"execution_count": 644,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"_StoreFalseAction(option_strings=['--no-plutil'], dest='plutul', nargs=0, const=False, default=True, type=None, choices=None, help='disable plutil system call', metavar=None)"
	]
	},
	"execution_count": 644,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"'hi.md'"
	]
	},
	"execution_count": 644,
	"metadata": {},
	"output_type": "execute_result"
	},
	{
	"data": {
	"text/plain": [
	"Namespace(output_file='hi.md', plutul=False, write=False)"
	]
	},
	"execution_count": 644,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from argparse import ArgumentParser\n",
	"parser = ArgumentParser()\n",
	"parser.add_argument(\"-f\", \"--file\", dest=\"output_file\", default='~/Dropbox/Safari-ReadingList.md',\n",
	" help=\"output markdown file\", metavar=\"file\")\n",
	"\n",
	"parser.add_argument(\"--no-write\", dest=\"write\", action='store_false',\n",
	" help=\"write to output file\")\n",
	"\n",
	"parser.add_argument(\"--no-plutil\", dest=\"plutul\", action='store_false',\n",
	" help=\"disable plutil system call\")\n",
	"\n",
	"args = parser.parse_args('-f hi.md --no-write --no-plutil'.split())\n",
	"\n",
	"args"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 654,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"hi.md\n",
	"--no-write\n"
	]
	},
	{
	"ename": "AttributeError",
	"evalue": "'dict' object has no attribute 'output_file'",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-654-2a0fc54f8ad2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# args2 = dict(output_file='test')\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 11\u001b[0;31m \u001b[0margprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
	"\u001b[0;32m<ipython-input-654-2a0fc54f8ad2>\u001b[0m in \u001b[0;36margprint\u001b[0;34m(args1)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0margprint\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0margs1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs1\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput_file\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"--no-write\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# print(args1.write)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;31mAttributeError\u001b[0m: 'dict' object has no attribute 'output_file'"
	]
	}
	],
	"source": [
	"def argprint (args1):\n",
	" print(args1.output_file)\n",
	" print(\"--no-write\")\n",
	"# print(args1.write)\n",
	"\n",
	"argprint(args)\n",
	"\n",
	"args2 = {'output_file': 'test'}\n",
	"# args2 = dict(output_file='test')\n",
	"\n",
	"argprint(args2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}
	#!/usr/bin/python
	# ReadingListCatcher
	# - A script for exporting Safari Reading List items to Markdown and Pinboard
	# Brett Terpstra 2015
	# https://gist.github.com/ttscoff/f27f161f37bbc7f5b418
	#
	# Uses code from <https://gist.github.com/robmathers/5995026>
	# Requires Python pinboard lib for Pinboard.in import:
	# `easy_install pinboard` or `pip install pinboard`

	import plistlib
	from shutil import copy
	import subprocess
	import os
	from tempfile import gettempdir
	import sys
	import re
	import time
	from datetime import date, datetime, timedelta
	from os import path
	import pytz

	BOOKMARKS_MARKDOWN_FILE = '~/Dropbox/Safari-ReadingList.md' # Markdown file if using md export
	BOOKMARKS_PLIST = '~/Library/Safari/Bookmarks.plist' # Shouldn't need to modify
	# call `plutil -convert xml1 <file>` to pre-process bookmark file in local directory

	USE_PLUTIL = True # default

	def copyTempFile(srcFile):
	# Make a copy of the bookmarks and convert it from a binary plist to text
	tempDirectory = gettempdir()
	copy(srcFile, tempDirectory)
	tmpFile = os.path.join(tempDirectory, os.path.basename(srcFile))
	return tmpFile

	def removeTempFile(tmpFile):
	os.remove(tmpFile)

	class _readingList():
	def __init__(self, args):

	print(args)

	bookmarksFile = os.path.expanduser(args.input_file)
	markdownFile = os.path.expanduser(args.output_file)

	bookmarksFileCopy = copyTempFile(bookmarksFile)
	sys.stdout.write('tmpfile bookmarksFileCopy: ')
	print(bookmarksFileCopy)

	self.postedCount = 0
	self.content = ''
	self.newcontent = ''
	# last = time.strptime((datetime.now() - timedelta(days = 1)).strftime('%c'))
	last = time.strptime("2013-01-01 00:00:00 UTC", '%Y-%m-%d %H:%M:%S UTC')

	if USE_PLUTIL or args.use_plutil:
	converted = subprocess.call(['plutil', '-convert', 'xml1', bookmarksFileCopy])
	else:
	converted = 0

	if converted != 0:
	print('Couldn\'t convert bookmarks plist from xml format')
	sys.exit(converted)

	if args.write:
	if not os.path.exists(markdownFile):
	open(markdownFile, 'a').close()
	else:
	with open (markdownFile, 'r') as mdInput:
	self.content = mdInput.read()
	matchLast = re.search(re.compile('(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} UTC)'), self.content)
	if matchLast != None:
	last = time.strptime(matchLast.group(1), '%Y-%m-%d %H:%M:%S UTC')

	last = datetime(*last[:6])

	rx = re.compile("(?m)^Updated: (\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) UTC")
	self.content = re.sub(rx,'',self.content).strip()

	plist = plistlib.readPlist(bookmarksFileCopy)

	# python2.7 error
	# --> /opt/conda/lib/python3.6/site-packages/ipykernel_launcher.py:5: DeprecationWarning: The readPlist function is deprecated, use load() instead
	# with open(bookmarksFileCopy, 'rb') as fp:
	# print(dir(plistlib)
	# plist = plistlib.load(fp)

	# There should only be one Reading List item, so take the first one
	readingList = [item for item in plist['Children'] if 'Title' in item and item['Title'] == 'com.apple.ReadingList'][0]

	if 'Children' in readingList:
	cleanRx = re.compile("[\\|\`\:_\*\n]")

	for item in readingList['Children']:
	if item['ReadingList']['DateAdded'] > last:
	addtime = pytz.utc.localize(item['ReadingList']['DateAdded']).strftime('%c')
	title = re.sub(cleanRx, ' ', item['URIDictionary']['title'].encode('utf8'))
	# title = re.sub(cleanRx, ' ', item['URIDictionary']['title']) #python3
	title = re.sub(' +', ' ', title)
	url = item['URLString']
	description = ''

	if 'PreviewText' in item['ReadingList']:
	description = item['ReadingList']['PreviewText'].encode('utf8')
	# description = item['ReadingList']['PreviewText'] # python3
	description = re.sub(cleanRx, ' ', description)
	description = re.sub(' +', ' ', description)

	self.itemToMarkdown(addtime, title, url, description)

	else:
	break

	pluralized = 'bookmarks' if self.postedCount > 1 else 'bookmark'

	if args.write:
	mdHandle = open(markdownFile, 'w')
	mdHandle.write('Updated: ' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + " UTC\n\n")
	mdHandle.write(self.newcontent + self.content)
	mdHandle.close()

	if self.postedCount > 0:
	sys.stdout.write('\n' + datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S') + ' UTC\n')
	sys.stdout.write('Found ' + str(self.postedCount) + ' new ' + pluralized + "\n")
	sys.stdout.write(('Saved' if args.write else 'WARN --no-write; so not writing') + ' to ' + markdownFile)
	else:
	sys.stdout.write('No new bookmarks found in Reading List')

	sys.stdout.write("\n")

	removeTempFile(bookmarksFileCopy)

	def itemToMarkdown(self, addtime, title, url, description):
	self.newcontent += '- [' + title + '](' + url + ' "Added on ' + addtime + '")'
	if not description == '':
	self.newcontent += "\n\n > " + description
	self.newcontent += "\n\n"
	self.postedCount += 1

	if __name__ == "__main__":

	from argparse import ArgumentParser
	parser = ArgumentParser()
	parser.add_argument("-f", "--out-file", dest="output_file", default=BOOKMARKS_MARKDOWN_FILE,
	help="output markdown file", metavar="outfile")
	parser.add_argument("-b", "--bookmarks-file", dest="input_file", default=BOOKMARKS_PLIST,
	help="input Bookmarks.plist file", metavar="infile")
	parser.add_argument("--no-write", dest="write", action='store_false',
	help="write to output file")
	parser.add_argument("--no-plutil", dest="use_plutil", action='store_false',
	help="disable plutil system call - useful for running in jupyter or on linux.\nWARN you must parse the plist file yourself")

	args = parser.parse_args()
	_readingList(args)