Created
February 14, 2020 12:39
-
-
Save zachcp/7e006e9763b27a3db908a62ae1603579 to your computer and use it in GitHub Desktop.
Pocket-to-DevonThink
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- POCKET2DEVONTHINK | |
-- script imports articles from a local Pocket for Mac app into the inbox of the current Devonthink database | |
-- script comes with the use-it-like-you-want-to-and-dont-blame-me licence | |
-- last changed on Jan 5, 2015 | |
-- recent changes: errorhandling; pdfs for pocket records without mime setting; conditional searches | |
-- USER SETTINGS; please adopt to your needs | |
--loop_min/max define the range of pocket articles to be imported | |
-- 1/10 would, e.g., import the first ten articles you've ever stored in Pocket | |
-- 1 and, say, 100000 would presumably move all of them into DT | |
property loop_min : 1 --useful to import only a few documents | |
property loop_max : 10000 -- to DT, e.g. for testing or after errors | |
property where_condition : "" --set to "" or something like " WHERE unique_id='13148'" or " WHERE unique_id>'13148'"; make sure loop_min is small enough | |
-- some variables required in this script | |
property scriptlastchanged : "05.01.2015 10:00" | |
property user_agent : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5" | |
property sqlite_path : "sqlite3" --"/Applications/Sente65.app/Contents/MacOS/sqlite3" | |
property strEOR : "<EOR>" & return | |
property strRecDelim : quote & strEOR & quote | |
property sFieldDelim : ";; " | |
property db_path_p : POSIX path of (path to home folder) & "Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/readItLater3.sqlite" | |
property quoted_db_path_p : quoted form of db_path_p | |
property offline_path : POSIX path of (path to home folder) & "Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/" | |
property tempfolder_path : POSIX path of (path to home folder) & "Desktop/import_pocket/" | |
property timestamp : time of (current date) --for logfile | |
property insta_url_prefix : "http://www.instapaper.com/text?u=http%3A%2F%2F" | |
property insta_urls_prefix : "http://www.instapaper.com/text?u=https%3A%2F%2F" | |
property pocket_url_prefix : "http://getpocket.com/a/read/" | |
## get list of articles from pocket db and it into arrayish list of lists | |
if where_condition is "" then | |
set the_articles_text to get_articles("") | |
else | |
set the_articles_text to get_articles(where_condition) | |
end if | |
set the_articles_text to replaceString(the_articles_text, {" | |
"}, "\\n") -- avoids | |
set the_articles to textToTwoDArray(the_articles_text, character id 13, ";; ") | |
writelog("Pocket2Devonthink | |
--------------- | |
Script last changed: " & scriptlastchanged & " | |
" & (current date) & " | |
Pocket articles to create in Devonthink: " & loop_min & " - " & loop_max & " | |
Condition: " & where_condition, timestamp) | |
## LOOP through reading list items | |
set loop_count to 0 | |
set error_count to 0 -- number is written into log file | |
repeat with this_article in the_articles | |
set loop_count to loop_count + 1 | |
set itemlog to "" -- this var gets written to the logfile at the end of each repeat when errors_raised | |
set errors_raised to false --current item gets into the logfile only when an error occured | |
log " | |
################ | |
# " & loop_count & " | |
" | |
# MIN MAX Loop | |
if (loop_count ≥ loop_min) and (loop_count ≤ loop_max) then | |
set itemlog to " | |
##" & loop_count & " | |
" | |
set itemlog to itemlog & "Raw data on this article according to pocket database: " | |
set text item delimiters to ";; " | |
set itemlog to itemlog & this_article | |
set text item delimiters to ", " | |
# GET METADATA from reading list | |
try | |
set uid to item 1 of this_article | |
set item_id to item 2 of this_article | |
set url_orig to item 3 of this_article | |
set title to item 4 of this_article | |
set time_added_pocket to item 5 of this_article | |
set time_added to timestamp2appledate(time_added_pocket) -- to date rli_date | |
set word_count to item 6 of this_article | |
set mime to item 7 of this_article | |
set offline_text to item 8 of this_article | |
set offline_web to item 9 of this_article | |
set itemlog to itemlog & " | |
loop_count: " & loop_count & ";; | |
uid: " & uid & ";; | |
item_id: " & item_id & ";; | |
url_orig: " & url_orig & ";; | |
title: " & title & ";; | |
time_added: " & (time_added as string) & ";; | |
word_count: " & word_count & ";; | |
mime: " & mime & ";; | |
offline_text: " & offline_text & ";; | |
offline_web: " & offline_web | |
log itemlog | |
on error errormsg | |
set itemlog to itemlog & " | |
--> Error while analysing the data list for this article. Usually, this is caused by a return character in the title field. Please add this article manually to DT. | |
" & errormsg | |
set errors_raised to true | |
set error_count to error_count + 1 | |
end try | |
# BUILD IMPORT URLs (they might be used or not further down in this script) | |
(* | |
# Instapaper | |
set urlshort to remove_http(url_orig) -- instapaper needs http://, https://, ftp:// removed from url | |
if characters 1 through 6 of url_orig as string = "https:" then | |
set insta_url to (insta_urls_prefix & urlshort) | |
else | |
set insta_url to (insta_url_prefix & urlshort) | |
end if | |
set pocket_url to pocket_url_prefix & item_id | |
*) | |
# COPY FILE to temporary folder on Desktop | |
set this_offlinefolder_path to offline_path & uid & "/" | |
set this_tempfolder_path to tempfolder_path & uid & "/" | |
try | |
set has_local_file to true | |
do shell script "ditto " & (quoted form of this_offlinefolder_path) & " " & this_tempfolder_path | |
--should result in something like this: "ditto '/Users/me/Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/10001/' /Users/me/Desktop/test_pocket2/10001/" | |
-- /Users/me/Library/Containers/com.readitlater.PocketMac/Data/Library/… appears to be inaccessible via applescript | |
on error errormsg | |
set has_local_file to false | |
set itemlog to itemlog & " | |
--> apparently no local copy | |
" & errormsg | |
set errors_raised to true | |
set error_count to error_count + 1 | |
end try | |
# CREATE RECORD in DEVONthink | |
set result_record to null | |
tell application id "DNtp" | |
try | |
set location_target to incoming group of current database | |
## Using Pocket's offline copies located in | |
-- Library/Containers/com.readitlater.PocketMac/Data/Library/Application Support/Pocket/offline/cache0/RIL_pages/ | |
-- slightly different record creation depending on file type and existence of offline copies | |
-- would have been easier to just import text.html, web.html, web.pdf - whichever exists | |
if mime = "application/pdf" or url_orig ends with ".pdf" then -- doesn't catch those pdfs w/o mime and url without "pdf" in it | |
set local_url to "file://" & this_tempfolder_path & "web.pdf" | |
set itemlog to itemlog & " | |
" & local_url | |
set result_record to create PDF document from local_url in location_target | |
else if offline_text = "1" then | |
set local_url to "file://" & (POSIX path of this_tempfolder_path) & "text.html" | |
set itemlog to itemlog & " | |
" & local_url | |
set result_record to create formatted note from local_url in location_target | |
else if offline_web = "1" then | |
set local_url to "file://" & this_tempfolder_path & "web.html" | |
set local_path to this_tempfolder_path & "web.html" | |
set itemlog to itemlog & " | |
" & local_url | |
set rec1 to import local_path to location_target | |
set rec2 to convert record rec1 to rich -- DT doesn't allow to set URL (or rather: it doesn't show up in the address line | |
delete record rec1 | |
set result_record to rec2 | |
else if not has_local_file then | |
set result_record to create record with {URL:url_orig, type:bookmark} in location_target | |
--else | |
--set result_record to create record with {name:"error", plain text:"Something went wrong with this record in the if-mime-then operation\n\n" & itemlog, type:text} in location_target | |
end if | |
on error errormsg | |
set itemlog to itemlog & " | |
--> Something went wrong while creating this record in Devonthink | |
" & errormsg | |
--writelog(itemlog, timestamp) | |
try | |
set result_record to create record with {name:"error", plain text:itemlog, type:text} in location_target | |
end try | |
set errors_raised to true | |
set error_count to error_count + 1 | |
end try | |
# TEST whether record was created | |
try | |
set record_created to true | |
name of result_record -- raises an error if class type is missing value | |
on error errormsg | |
set record_created to false | |
set itemlog to itemlog & " | |
--> No record created in Devonthink | |
" | |
set errors_raised to true | |
set error_count to error_count + 1 | |
end try | |
# SET METADATA | |
if record_created then | |
try | |
set this_record to result_record | |
tell this_record | |
set name to (title) | |
set the creation date to time_added | |
set URL to url_orig | |
set comment to "unique_id::" & uid & " | |
item_id::" & item_id & " | |
loop_count::" & loop_count | |
end tell | |
end try | |
end if | |
(* | |
-- local copy looks best in 8 of 10 cases; hence I've out-commented this | |
## Instapaper - create record by downloading via http://www.instapaper.com/text?u=http(s)%3A%2F%2F | |
set rec_insta to create formatted note from insta_url in location_target | |
tell rec_insta | |
set name to (title & " // insta") | |
set the creation date to time_added | |
set URL to url_orig | |
set comment to "unique_id::" & uid & "\nitem_id::" & item_id & "\nloop_count::" & loop_count | |
end tell | |
## Pocket - create record by downloading via getpocket.com/a/read/[item_id] | |
-- looks best, but unreliable unless invoked in browser on Pocket's website | |
set rec_pocket to create formatted note from pocket_url in location_target | |
tell rec_pocket | |
set name to (title & " // pocket") | |
set the creation date to time_added | |
set URL to url_orig | |
set comment to "unique_id::" & uid & "\nitem_id::" & item_id & "\nloop_count::" & loop_count | |
end tell | |
*) | |
end tell | |
if errors_raised then writelog(itemlog, timestamp) | |
log itemlog | |
else if (loop_count > loop_max) then | |
exit repeat | |
end if | |
end repeat | |
writelog((" | |
" & error_count & " error(s) occured. (" & (current date) & "). Loop_count: " & loop_count), timestamp) | |
set logfile to (path to desktop as string) & "Pocket2Devonthink_Log_" & timestamp & ".txt" | |
set y to POSIX path of logfile | |
--do shell script "open " & ((path to desktop) as text) & "Pocket2Devonthink_Log_" & timestamp & ".txt" | |
do shell script "open " & y | |
## SOME FUNCTIONS | |
## get_articles() | |
## searches the reference table of pocket db; where-string can be empty; returns list of articles | |
-- 1. unique_id, 2. item_id, 3. url, 4. title, 5. time_added, | |
-- 6. word_count, 7. mime (required to id pdfs via "application/pdf", 8. offline_text (has text.html), 9. offline_web (has web.hmtl) | |
-- unique_id is used in file system as well | |
on get_articles(sql_where) | |
log ">>>> GetPocketReferences" | |
set sCommand to sqlite_path & " -separator ';; ' " & quoted_db_path_p & " 'select unique_id, item_id, url, title, time_added, word_count, mime, offline_text, offline_web,\"<EOR> | |
\" | |
from items" & sql_where & ";'" | |
set sResult to (do shell script sCommand) | |
(*set AppleScript's text item delimiters to {strEOR} | |
set lstResults to paragraphs of sResult | |
set AppleScript's text item delimiters to return | |
--log ">>get_articles returns: " & return & "\t" & lstResults | |
set AppleScript's text item delimiters to "" | |
return lstResults *) | |
return sResult | |
end get_articles | |
on textToTwoDArray(theText, mainDelimiter, secondaryDelimiter) | |
set {tids, text item delimiters} to {text item delimiters, mainDelimiter} | |
set firstArray to text items of theText | |
set text item delimiters to secondaryDelimiter | |
set twoDArray to {} | |
repeat with anItem in firstArray | |
set end of twoDArray to text items of anItem | |
end repeat | |
set text item delimiters to tids | |
return twoDArray | |
end textToTwoDArray | |
on timestamp2appledate(timestamp) | |
set h to do shell script "date -r " & timestamp & " \"+%Y %m %d %H %M %S\"" | |
set mydate to current date | |
set year of mydate to (word 1 of h as integer) | |
set month of mydate to (word 2 of h as integer) | |
set day of mydate to (word 3 of h as integer) | |
set hours of mydate to (word 4 of h as integer) | |
set minutes of mydate to (word 5 of h as integer) | |
set seconds of mydate to (word 6 of h as integer) | |
return mydate | |
end timestamp2appledate | |
on remove_http(url1) | |
try | |
set n to count of url1 | |
if characters 1 through 6 of url1 as string = "https:" then | |
set url2 to characters 9 thru n of url1 as string | |
else if characters 1 through 5 of url1 as string = "http:" then | |
set url2 to characters 8 thru n of url1 as string | |
else | |
log "url1: " & url1 | |
log characters 1 through 4 of url1 as string | |
end if | |
log url2 | |
return url2 | |
on error | |
return "" | |
end try | |
end remove_http | |
on replaceString(theText, oldString, newString) | |
set AppleScript's text item delimiters to oldString | |
set tempList to every text item of theText | |
set AppleScript's text item delimiters to newString | |
set theText to the tempList as string | |
set AppleScript's text item delimiters to "" | |
return theText | |
end replaceString | |
on writelog(this_message, timestamp) | |
set the log_file to ((path to desktop) as text) & "Pocket2Devonthink_Log_" & timestamp & ".txt" | |
try | |
open for access file the log_file with write permission | |
write (this_message & return) to file the log_file starting at eof | |
close access file the log_file | |
on error | |
try | |
close access file the log_file | |
end try | |
end try | |
end writelog |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment