Skip to content

Instantly share code, notes, and snippets.

@artlung
Last active October 31, 2024 18:03
Show Gist options
  • Save artlung/15cb9acdf720e5ede9254bd247ccc59b to your computer and use it in GitHub Desktop.
Save artlung/15cb9acdf720e5ede9254bd247ccc59b to your computer and use it in GitHub Desktop.
Post links stored in individual JSON files to Linkding
<?php
// Instapaper allows you to export your saved links as a CSV file
// And I turn it into my default LINKS json format
// for better or worse
$filename = 'instapaper-export.csv';
$csv = array_map('str_getcsv', file($filename));
$ignore_header_line = true;
$destination_for_link_json = './LINKS/';
// URL,Title,Selection,Folder,Timestamp,Tags
/*
* // goal is to create json looking like this
* tags is expected to be [] since I have no tags
* ignore any urls which do not begin with http ane exit
*
{
"id": "sidebarlinks.3",
"url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
"tags": [
"domain:adaptivepath.com"
],
"date_added": "2004-04-07T00:00:00+00:00",
"title": "Why Content Management Fails",
"description": "CMSes need managers",
"private": false,
"via": "",
"extra_derived": ""
}
*/
$selection_values = [];
$folder_values = [];
foreach ($csv as $row) {
if ($row[0] == 'URL') {
continue;
}
$url = $row[0];
$title = $row[1];
$selection = $row[2];
$folder = $row[3] ?? '';
$timestamp = $row[4] ?? '';
$tags = []; // I don't have any tags at the moment
if (strpos($url, 'http') !== 0) {
print "Ignoring $url because it is not an http(s) url\n";
continue;
}
if (!$timestamp) {
printf("%s does not have a timestamp\n", $url);
exit;
}
// After looking at the selection values, I'm not keeping them
// if (in_array($selection, $selection_values)) {
// // do nothing
// } else {
// $selection_values[] = $selection;
// }
//
// if (in_array($folder, $folder_values)) {
// // do nothing
// } else {
// $folder_values[] = $folder;
// }
if ($folder == 'Starred') {
$folder = 'instapaper:starred';
} elseif ($folder == 'Editor’s Picks') {
$folder = 'instapaper:editors-picks';
}
// get the domain of the url and strip any 'www.' from the beginning and add a tag
// that looks like domain:domain
$domain = parse_url($url, PHP_URL_HOST);
$domain = str_replace('www.', '', $domain);
$tags[] = "domain:$domain";
$tags[] = 'via:instapaper'; // to commemorate the source of the import
// save epoch timestamp as an iso 1806 timestamp
$date_added = date('c', $timestamp);
$id = 'instapaper' . md5($url);
// {
// "id": "sidebarlinks.3",
// "url": "http:\/\/www.adaptivepath.com\/publications\/essays\/archives\/000315.php",
// "tags": [
// "domain:adaptivepath.com"
// ],
// "date_added": "2004-04-07T00:00:00+00:00",
// "title": "Why Content Management Fails",
// "description": "CMSes need managers",
// "private": false,
// "via": "",
// "extra_derived": ""
//}
$link_data = [
'id' => $id,
'url' => $url,
'tags' => $tags,
'date_added' => $date_added,
'title' => $title,
'description' => '',
'private' => false,
'via' => '',
'extra_derived' => ''
];
// save it to a json file in the LINKS directory
$json_filename = $destination_for_link_json . $id . '.json';
file_put_contents($json_filename, json_encode($link_data, JSON_PRETTY_PRINT));
print "Saved $json_filename\n";
}
// Folder values were (
// [0] => Unread
// [1] => Archive
// [2] => Starred
// [3] => Editor’s Picks
//) which I will save as instapaper:starred and instapaper:editors-picks but ignore the others
//print "Selection values\n";
//print_r($selection_values);
//print "Folder values\n";
//print_r($folder_values);
<?php
// https://github.com/sissbruecker/linkding
// https://linkding.link/api/
// Author: Joe Crawford https://artlung.com/
// * Linkding is a bookmark tool I use to store links
// * I exported all my prior links to JSON files
// * Note that Linkding allows for import of a Netscape bookmarks file, which might be easier
// * It looks like submitting date_added values are not respected
// * My other stored metadata I put into thenotes field in linkding
/*
{
"id": "sidebarlinks.24327",
"url": "http:\/\/tantek.pbworks.com\/w\/page\/19402872\/CassisProject",
"tags": [
"php",
"javascript",
"programming",
"domain:tantek.pbworks.com"
],
"date_added": "2010-11-20T12:44:00+00:00",
"title": "tantek \/ CassisProject",
"description": "javascript and php",
"private": false,
"via": null,
"extra_derived": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf|meta:b2a7bfe14a384bfffde15e86f29e69f4"
}
*/
// After submitting, the payload returned from POST /api/bookmarks/ looks like
/*
{
"id": 941,
"url": "http://tantek.pbworks.com/w/page/19402872/CassisProject",
"title": "tantek / CassisProject",
"description": "javascript and php",
"notes": "hash:9104bf0bf0fdc466dca3d4b692d3f4cf|meta:b2a7bfe14a384bfffde15e86f29e69f4",
"web_archive_snapshot_url": "",
"favicon_url": null,
"preview_image_url": null,
"is_archived": false,
"unread": false,
"shared": true,
"tag_names": [
"domain:tantek.pbworks.com",
"javascript",
"php",
"programming"
],
"date_added": "2024-10-30T17:36:26.689901Z",
"date_modified": "2024-10-31T04:24:45.961427Z",
"website_title": null,
"website_description": null
}
*/
// one directory for json source
// one directory for json results
$source_dir = './LINKS/';
$rest_result_dir = './REST_RESULTS/';
$linkding_instance = 'https://example.com';
$linkding_token = '';
if ($linkding_token === '') {
printf("No token found, set \$linkding_token\n");
exit(1);
}
// documentation: https://linkding.link/api/
// Header looks like
// Authorization: Token <Token>
// Methods
// GET /api/bookmarks/
// GET /api/bookmarks/archived/
// GET /api/bookmarks/<id>/
// GET /api/bookmarks/<id>/
// GET /api/bookmarks/check/?url=https%3A%2F%2Fexample.com
// To create a link
// POST /api/bookmarks/
/* payload
{
"url": "https://example.com",
"title": "Example title",
"description": "Example description",
"notes": "Example notes",
"is_archived": false,
"unread": false,
"shared": false,
"tag_names": [
"tag1",
"tag2"
]
}
*/
// To update a link
//PUT /api/bookmarks/<id>/
//PATCH /api/bookmarks/<id>/
// read source directory for only json files
$filenames = scandir($source_dir);
// do one at a time
foreach ($filenames as $filename) {
$source_file = $source_dir . $filename;
$rest_result_file = $rest_result_dir . $filename;
// if filename does not end with json, bail out
if (!preg_match('/\.json$/', $filename)) {
continue;
}
if (file_exists($source_file) && !file_exists($rest_result_file)) {
$source = file_get_contents($source_file);
$json = json_decode($source, true);
printf("%s has not been submitted to the REST API\n", $filename);
$payload = [
'url' => $json['url'],
'title' => $json['title'],
'description' => $json['description'],
'notes' => $json['extra_derived'],
'is_archived' => false,
'unread' => false,
'shared' => !$json['private'],
'tag_names' => $json['tags'],
'date_added' => $json['date_added'],
];
$payload_json = json_encode($payload);
$auth_header = 'Authorization';
$auth = 'Token ' . $linkding_token;
$method = 'POST';
$api_endpoint = $linkding_instance;
$api_method = '/api/bookmarks/';
$url = $api_endpoint . $api_method;
// post to the REST API
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, $method);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload_json);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HTTPHEADER, array(
'Content-Type: application/json',
'Content-Length: ' . strlen($payload_json),
$auth_header . ': ' . $auth
));
// save the results to a file in REST_RESULTS
$result = curl_exec($ch);
$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
// if it's good, save the resulting json file
if ($http_code == 200 || $http_code == 201 || $http_code == 204) {
file_put_contents($rest_result_file, $result);
printf("Submitted %s to the REST API\n", $filename);
} else {
printf("Error submitting %s to the REST API\n", $filename);
printf("HTTP Code: %d\n", $http_code);
printf("Result: %s\n", $result);
}
} else {
printf("%s has already been submitted to the REST API\n", $filename);
}
}
<?php
$source_dir = './LINKS/';
$rest_result_dir = './REST_RESULTS/';
$filenames = scandir($source_dir);
// do this no more than 10 times
$count = 0;
foreach ($filenames as $filename) {
// must end in json
if (!preg_match('/^\d+.*\.json$/', $filename)) {
continue;
}
$has_been_submitted = false;
if (file_exists($rest_result_dir . $filename)) {
continue;
}
printf("%s has not been submitted to the REST API\n", $filename);
$count++;
if ($count > 30) {
break;
}
}
// how many files with .json at the end are in each directory
$count_of_source = 0;
$count_of_rest = 0;
$source_files = glob($source_dir . '*.json');
$rest_files = glob($rest_result_dir . '*.json');
printf("There are %d files in %s\n", count($source_files), $source_dir);
printf("There are %d files in %s\n", count($rest_files), $rest_result_dir);
// how many are left to go?
$source_files = glob($source_dir . '*.json');
$rest_files = glob($rest_result_dir . '*.json');
$source_files = array_map(function ($f) {
return basename($f);
}, $source_files);
$rest_files = array_map(function ($f) {
return basename($f);
}, $rest_files);
$diff = array_diff($source_files, $rest_files);
printf("There are %d files left to go\n", count($diff));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment