artlung · October 31, 2024 18:03
diff --git a/parse-instapaper-csv.php b/parse-instapaper-csv.php
 <?php

 // Instapaper allows you to export your saved links as a CSV file
 // And I turn it into my default LINKS json format
 // for better or worse

 $filename = 'instapaper-export.csv';
 $csv = array_map('str_getcsv', file($filename));
 $ignore_header_line = true;
 $destination_for_link_json = './LINKS/';

 // URL,Title,Selection,Folder,Timestamp,Tags

 /*
 * // goal is to create json looking like this
 * tags is expected to be [] since I have no tags
 * ignore any urls which do not begin with http ane exit
 *
 {
    "id": "sidebarlinks.3",
    "url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
    "tags": [
        "domain:adaptivepath.com"
    ],
    "date_added": "2004-04-07T00:00:00+00:00",
    "title": "Why Content Management Fails",
    "description": "CMSes need managers",
    "private": false,
    "via": "",
    "extra_derived": ""
 }
 */

 $selection_values = [];
 $folder_values = [];

 foreach ($csv as $row) {
    if ($row[0] == 'URL') {
        continue;
    }
    $url = $row[0];
    $title = $row[1];
    $selection = $row[2];
    $folder = $row[3] ?? '';
    $timestamp = $row[4] ?? '';
    $tags = []; // I don't have any tags at the moment


    if (strpos($url, 'http') !== 0) {
        print "Ignoring $url because it is not an http(s) url\n";
        continue;
    }

    if (!$timestamp) {
        printf("%s does not have a timestamp\n", $url);
        exit;
    }

    // After looking at the selection values, I'm not keeping them
 //    if (in_array($selection, $selection_values)) {
 //        // do nothing
 //    } else {
 //        $selection_values[] = $selection;
 //    }
 //
 //    if (in_array($folder, $folder_values)) {
 //        // do nothing
 //    } else {
 //        $folder_values[] = $folder;
 //    }


        if ($folder == 'Starred') {
            $folder = 'instapaper:starred';
        } elseif ($folder == 'Editor’s Picks') {
            $folder = 'instapaper:editors-picks';
        }
        // get the domain of the url and strip any 'www.' from the beginning and add a tag
        // that looks like domain:domain
        $domain = parse_url($url, PHP_URL_HOST);
        $domain = str_replace('www.', '', $domain);
        $tags[] = "domain:$domain";
        $tags[] = 'via:instapaper'; // to commemorate the source of the import

        // save epoch timestamp as an iso 1806 timestamp
        $date_added = date('c', $timestamp);

        $id = 'instapaper' . md5($url);
 //    {
 //        "id": "sidebarlinks.3",
 //    "url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
 //    "tags": [
 //        "domain:adaptivepath.com"
 //    ],
 //    "date_added": "2004-04-07T00:00:00+00:00",
 //    "title": "Why Content Management Fails",
 //    "description": "CMSes need managers",
 //    "private": false,
 //    "via": "",
 //    "extra_derived": ""
 //}
        $link_data = [
            'id' => $id,
            'url' => $url,
            'tags' => $tags,
            'date_added' => $date_added,
            'title' => $title,
            'description' => '',
            'private' => false,
            'via' => '',
            'extra_derived' => ''
        ];
        // save it to a json file in the LINKS directory
        $json_filename = $destination_for_link_json . $id . '.json';
        file_put_contents($json_filename, json_encode($link_data, JSON_PRETTY_PRINT));
        print "Saved $json_filename\n";

 }
 // Folder values were (
 //    [0] => Unread
 //    [1] => Archive
 //    [2] => Starred
 //    [3] => Editor’s Picks
 //) which I will save as instapaper:starred and instapaper:editors-picks but ignore the others
 //print "Selection values\n";
 //print_r($selection_values);
 //print "Folder values\n";
 //print_r($folder_values);
diff --git a/post-links-to-linkding.php b/post-links-to-linkding.php
 <?php

 // https://github.com/sissbruecker/linkding
 // https://linkding.link/api/
 // Author: Joe Crawford https://artlung.com/
 // * Linkding is a bookmark tool I use to store links
 // * I exported all my prior links to JSON files
 // * Note that Linkding allows for import of a Netscape bookmarks file, which might be easier
 // * It looks like submitting date_added values are not respected
 // * My other stored metadata I put into thenotes field in linkding

 /*
 {
    "id": "sidebarlinks.24327",
    "url": "http:\/\/tantek.pbworks.com\/w\/page\/19402872\/CassisProject",
    "tags": [
    "php",
    "javascript",
    "programming",
    "domain:tantek.pbworks.com"
 ],
    "date_added": "2010-11-20T12:44:00+00:00",
    "title": "tantek \/ CassisProject",
    "description": "javascript and php",
    "private": false,
    "via": null,
    "extra_derived": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf|meta:b2a7bfe14a384bfffde15e86f29e69f4"
 }
 */

 // After submitting, the payload returned from POST /api/bookmarks/ looks like
 /*
 {
  "id": 941,
  "url": "http://tantek.pbworks.com/w/page/19402872/CassisProject",
  "title": "tantek / CassisProject",
  "description": "javascript and php",
  "notes": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf|meta:b2a7bfe14a384bfffde15e86f29e69f4",
  "web_archive_snapshot_url": "",
  "favicon_url": null,
  "preview_image_url": null,
  "is_archived": false,
  "unread": false,
  "shared": true,
  "tag_names": [
    "domain:tantek.pbworks.com",
    "javascript",
    "php",
    "programming"
  ],
  "date_added": "2024-10-30T17:36:26.689901Z",
  "date_modified": "2024-10-31T04:24:45.961427Z",
  "website_title": null,
  "website_description": null
 }
 */


 // one directory for json source
 // one directory for json results
 $source_dir = './LINKS/';
 $rest_result_dir = './REST_RESULTS/';

 $linkding_instance = 'https://example.com';
 $linkding_token = '';

 if ($linkding_token === '') {
    printf("No token found, set \$linkding_token\n");
    exit(1);
 }

 // documentation: https://linkding.link/api/
 // Header looks like
 // Authorization: Token <Token>
 // Methods
 // GET /api/bookmarks/
 // GET /api/bookmarks/archived/
 // GET /api/bookmarks/<id>/
 // GET /api/bookmarks/<id>/
 // GET /api/bookmarks/check/?url=https%3A%2F%2Fexample.com

 // To create a link
 // POST /api/bookmarks/
 /* payload
 {
  "url": "https://example.com",
  "title": "Example title",
  "description": "Example description",
  "notes": "Example notes",
  "is_archived": false,
  "unread": false,
  "shared": false,
  "tag_names": [
    "tag1",
    "tag2"
  ]
 }
 */

 // To update a link
 //PUT /api/bookmarks/<id>/
 //PATCH /api/bookmarks/<id>/

 // read source directory for only json files

 $filenames = scandir($source_dir);

 // do one at a time
 foreach ($filenames as $filename) {
    $source_file = $source_dir . $filename;
    $rest_result_file = $rest_result_dir . $filename;

    // if filename does not end with json, bail out
    if (!preg_match('/\.json$/', $filename)) {
        continue;
    }

    if (file_exists($source_file) && !file_exists($rest_result_file)) {
        $source = file_get_contents($source_file);
        $json = json_decode($source, true);

        printf("%s has not been submitted to the REST API\n", $filename);

        $payload = [
            'url' => $json['url'],
            'title' => $json['title'],
            'description' => $json['description'],
            'notes' => $json['extra_derived'],
            'is_archived' => false,
            'unread' => false,
            'shared' => !$json['private'],
            'tag_names' => $json['tags'],
            'date_added' => $json['date_added'],
        ];

        $payload_json = json_encode($payload);
        $auth_header = 'Authorization';
        $auth = 'Token ' . $linkding_token;
        $method = 'POST';
        $api_endpoint = $linkding_instance;
        $api_method = '/api/bookmarks/';

        $url = $api_endpoint . $api_method;
        // post to the REST API
        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload_json);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_HTTPHEADER, array(
            'Content-Type: application/json',
            'Content-Length: ' . strlen($payload_json),
            $auth_header . ': ' . $auth
        ));

        // save the results to a file in REST_RESULTS
        $result = curl_exec($ch);

        $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        // if it's good, save the resulting json file
        if ($http_code == 200 || $http_code == 201 || $http_code == 204) {
            file_put_contents($rest_result_file, $result);
            printf("Submitted %s to the REST API\n", $filename);
        } else {
            printf("Error submitting %s to the REST API\n", $filename);
            printf("HTTP Code: %d\n", $http_code);
            printf("Result: %s\n", $result);
        }

    } else {
        printf("%s has already been submitted to the REST API\n", $filename);
    }
 }
diff --git a/progress.php b/progress.php
 <?php

 $source_dir = './LINKS/';
 $rest_result_dir = './REST_RESULTS/';

 $filenames = scandir($source_dir);

 // do this no more than 10 times

 $count = 0;
 foreach ($filenames as $filename) {

    // must end in json
    if (!preg_match('/^\d+.*\.json$/', $filename)) {
        continue;
    }


    $has_been_submitted = false;
    if (file_exists($rest_result_dir . $filename)) {
        continue;
    }

    printf("%s has not been submitted to the REST API\n", $filename);
    $count++;

    if ($count > 30) {
        break;
    }

 }

 // how many files with .json at the end are in each directory

 $count_of_source = 0;
 $count_of_rest = 0;

 $source_files = glob($source_dir . '*.json');
 $rest_files = glob($rest_result_dir . '*.json');

 printf("There are %d files in %s\n", count($source_files), $source_dir);
 printf("There are %d files in %s\n", count($rest_files), $rest_result_dir);

 // how many are left to go?

 $source_files = glob($source_dir . '*.json');
 $rest_files = glob($rest_result_dir . '*.json');

 $source_files = array_map(function ($f) {
    return basename($f);
 }, $source_files);

 $rest_files = array_map(function ($f) {
    return basename($f);
 }, $rest_files);

 $diff = array_diff($source_files, $rest_files);

 printf("There are %d files left to go\n", count($diff));
	<?php

	// Instapaper allows you to export your saved links as a CSV file
	// And I turn it into my default LINKS json format
	// for better or worse

	$filename = 'instapaper-export.csv';
	$csv = array_map('str_getcsv', file($filename));
	$ignore_header_line = true;
	$destination_for_link_json = './LINKS/';

	// URL,Title,Selection,Folder,Timestamp,Tags

	/*
	* // goal is to create json looking like this
	* tags is expected to be [] since I have no tags
	* ignore any urls which do not begin with http ane exit
	*
	{
	"id": "sidebarlinks.3",
	"url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
	"tags": [
	"domain:adaptivepath.com"
	],
	"date_added": "2004-04-07T00:00:00+00:00",
	"title": "Why Content Management Fails",
	"description": "CMSes need managers",
	"private": false,
	"via": "",
	"extra_derived": ""
	}
	*/

	$selection_values = [];
	$folder_values = [];

	foreach ($csv as $row) {
	if ($row[0] == 'URL') {
	continue;
	}
	$url = $row[0];
	$title = $row[1];
	$selection = $row[2];
	$folder = $row[3] ?? '';
	$timestamp = $row[4] ?? '';
	$tags = []; // I don't have any tags at the moment


	if (strpos($url, 'http') !== 0) {
	print "Ignoring $url because it is not an http(s) url\n";
	continue;
	}

	if (!$timestamp) {
	printf("%s does not have a timestamp\n", $url);
	exit;
	}

	// After looking at the selection values, I'm not keeping them
	// if (in_array($selection, $selection_values)) {
	// // do nothing
	// } else {
	// $selection_values[] = $selection;
	// }
	//
	// if (in_array($folder, $folder_values)) {
	// // do nothing
	// } else {
	// $folder_values[] = $folder;
	// }


	if ($folder == 'Starred') {
	$folder = 'instapaper:starred';
	} elseif ($folder == 'Editor’s Picks') {
	$folder = 'instapaper:editors-picks';
	}
	// get the domain of the url and strip any 'www.' from the beginning and add a tag
	// that looks like domain:domain
	$domain = parse_url($url, PHP_URL_HOST);
	$domain = str_replace('www.', '', $domain);
	$tags[] = "domain:$domain";
	$tags[] = 'via:instapaper'; // to commemorate the source of the import

	// save epoch timestamp as an iso 1806 timestamp
	$date_added = date('c', $timestamp);

	$id = 'instapaper' . md5($url);
	// {
	// "id": "sidebarlinks.3",
	// "url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
	// "tags": [
	// "domain:adaptivepath.com"
	// ],
	// "date_added": "2004-04-07T00:00:00+00:00",
	// "title": "Why Content Management Fails",
	// "description": "CMSes need managers",
	// "private": false,
	// "via": "",
	// "extra_derived": ""
	//}
	$link_data = [
	'id' => $id,
	'url' => $url,
	'tags' => $tags,
	'date_added' => $date_added,
	'title' => $title,
	'description' => '',
	'private' => false,
	'via' => '',
	'extra_derived' => ''
	];
	// save it to a json file in the LINKS directory
	$json_filename = $destination_for_link_json . $id . '.json';
	file_put_contents($json_filename, json_encode($link_data, JSON_PRETTY_PRINT));
	print "Saved $json_filename\n";

	}
	// Folder values were (
	// [0] => Unread
	// [1] => Archive
	// [2] => Starred
	// [3] => Editor’s Picks
	//) which I will save as instapaper:starred and instapaper:editors-picks but ignore the others
	//print "Selection values\n";
	//print_r($selection_values);
	//print "Folder values\n";
	//print_r($folder_values);
	<?php

	// https://github.com/sissbruecker/linkding
	// https://linkding.link/api/
	// Author: Joe Crawford https://artlung.com/
	// * Linkding is a bookmark tool I use to store links
	// * I exported all my prior links to JSON files
	// * Note that Linkding allows for import of a Netscape bookmarks file, which might be easier
	// * It looks like submitting date_added values are not respected
	// * My other stored metadata I put into thenotes field in linkding

	/*
	{
	"id": "sidebarlinks.24327",
	"url": "http:\/\/tantek.pbworks.com\/w\/page\/19402872\/CassisProject",
	"tags": [
	"php",
	"javascript",
	"programming",
	"domain:tantek.pbworks.com"
	],
	"date_added": "2010-11-20T12:44:00+00:00",
	"title": "tantek \/ CassisProject",
	"description": "javascript and php",
	"private": false,
	"via": null,
	"extra_derived": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf\|meta:b2a7bfe14a384bfffde15e86f29e69f4"
	}
	*/

	// After submitting, the payload returned from POST /api/bookmarks/ looks like
	/*
	{
	"id": 941,
	"url": "http://tantek.pbworks.com/w/page/19402872/CassisProject",
	"title": "tantek / CassisProject",
	"description": "javascript and php",
	"notes": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf\|meta:b2a7bfe14a384bfffde15e86f29e69f4",
	"web_archive_snapshot_url": "",
	"favicon_url": null,
	"preview_image_url": null,
	"is_archived": false,
	"unread": false,
	"shared": true,
	"tag_names": [
	"domain:tantek.pbworks.com",
	"javascript",
	"php",
	"programming"
	],
	"date_added": "2024-10-30T17:36:26.689901Z",
	"date_modified": "2024-10-31T04:24:45.961427Z",
	"website_title": null,
	"website_description": null
	}
	*/


	// one directory for json source
	// one directory for json results
	$source_dir = './LINKS/';
	$rest_result_dir = './REST_RESULTS/';

	$linkding_instance = 'https://example.com';
	$linkding_token = '';

	if ($linkding_token === '') {
	printf("No token found, set \$linkding_token\n");
	exit(1);
	}

	// documentation: https://linkding.link/api/
	// Header looks like
	// Authorization: Token <Token>
	// Methods
	// GET /api/bookmarks/
	// GET /api/bookmarks/archived/
	// GET /api/bookmarks/<id>/
	// GET /api/bookmarks/<id>/
	// GET /api/bookmarks/check/?url=https%3A%2F%2Fexample.com

	// To create a link
	// POST /api/bookmarks/
	/* payload
	{
	"url": "https://example.com",
	"title": "Example title",
	"description": "Example description",
	"notes": "Example notes",
	"is_archived": false,
	"unread": false,
	"shared": false,
	"tag_names": [
	"tag1",
	"tag2"
	]
	}
	*/

	// To update a link
	//PUT /api/bookmarks/<id>/
	//PATCH /api/bookmarks/<id>/

	// read source directory for only json files

	$filenames = scandir($source_dir);

	// do one at a time
	foreach ($filenames as $filename) {
	$source_file = $source_dir . $filename;
	$rest_result_file = $rest_result_dir . $filename;

	// if filename does not end with json, bail out
	if (!preg_match('/\.json$/', $filename)) {
	continue;
	}

	if (file_exists($source_file) && !file_exists($rest_result_file)) {
	$source = file_get_contents($source_file);
	$json = json_decode($source, true);

	printf("%s has not been submitted to the REST API\n", $filename);

	$payload = [
	'url' => $json['url'],
	'title' => $json['title'],
	'description' => $json['description'],
	'notes' => $json['extra_derived'],
	'is_archived' => false,
	'unread' => false,
	'shared' => !$json['private'],
	'tag_names' => $json['tags'],
	'date_added' => $json['date_added'],
	];

	$payload_json = json_encode($payload);
	$auth_header = 'Authorization';
	$auth = 'Token ' . $linkding_token;
	$method = 'POST';
	$api_endpoint = $linkding_instance;
	$api_method = '/api/bookmarks/';

	$url = $api_endpoint . $api_method;
	// post to the REST API
	$ch = curl_init($url);
	curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
	curl_setopt($ch, CURLOPT_POSTFIELDS, $payload_json);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($ch, CURLOPT_HTTPHEADER, array(
	'Content-Type: application/json',
	'Content-Length: ' . strlen($payload_json),
	$auth_header . ': ' . $auth
	));

	// save the results to a file in REST_RESULTS
	$result = curl_exec($ch);

	$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
	// if it's good, save the resulting json file
	if ($http_code == 200 \|\| $http_code == 201 \|\| $http_code == 204) {
	file_put_contents($rest_result_file, $result);
	printf("Submitted %s to the REST API\n", $filename);
	} else {
	printf("Error submitting %s to the REST API\n", $filename);
	printf("HTTP Code: %d\n", $http_code);
	printf("Result: %s\n", $result);
	}

	} else {
	printf("%s has already been submitted to the REST API\n", $filename);
	}
	}