|
#!/usr/bin/env node |
|
|
|
const fs = require("fs"); |
|
const path = require("path"); |
|
const { execSync } = require("child_process"); |
|
|
|
const raw = process.argv.slice(2); |
|
|
|
function getFlag(names) { |
|
for (let n of names) { |
|
const idx = raw.indexOf(n); |
|
if (idx !== -1 && idx + 1 < raw.length) return raw[idx + 1]; |
|
} |
|
} |
|
const ARCHIVE_ROOT = getFlag(["--path"]); |
|
const USER_ID = parseInt(getFlag(["--user-id"]), 10); |
|
const POSTGRES_CONTAINER = getFlag(["--postgres-container-id"]); |
|
|
|
if (!ARCHIVE_ROOT || isNaN(USER_ID)) { |
|
console.error( |
|
"Usage: ./archivebox2linkwarden.js --path <archiveDir> --user-id <yourUserId> --postgres-container-id <containerId>" |
|
); |
|
process.exit(1); |
|
} |
|
|
|
const DATABASE_URL = process.env.DATABASE_URL; |
|
if (!POSTGRES_CONTAINER && !DATABASE_URL) { |
|
console.error( |
|
"Error: no DATABASE_URL or --postgres-container-id flag specified.\n" + |
|
"Please set DATABASE_URL=postgres://… (or set the --postgres-container-id flag if you are using Docker)" |
|
); |
|
process.exit(1); |
|
} |
|
|
|
// escape single quotes for SQL literals |
|
function esc(str) { |
|
return str.replace(/'/g, "''"); |
|
} |
|
// normalize "YYYY-MM-DD HH:mm" → ISO8601 |
|
function toIso(s) { |
|
return new Date(s.replace(" ", "T") + ":00Z").toISOString(); |
|
} |
|
|
|
/** |
|
* Run SQL via either host psql or docker exec. |
|
* returnOutput=true captures and returns the output (for RETURNING id). |
|
*/ |
|
function runPSQL(sql, returnOutput = false) { |
|
const flags = returnOutput ? "-q -t -A --no-psqlrc" : "-q --no-psqlrc"; |
|
const escaped = sql.replace(/"/g, '\\"'); |
|
|
|
let cmd, opts; |
|
if (POSTGRES_CONTAINER) { |
|
// exec into given container |
|
cmd = `docker exec -i ${POSTGRES_CONTAINER} psql -U postgres -d postgres ${flags} -c "${escaped}"`; |
|
opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" }; |
|
} else { |
|
cmd = `psql "${DATABASE_URL}" ${flags} -c "${escaped}"`; |
|
opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" }; |
|
} |
|
|
|
const out = execSync(cmd, opts); |
|
return returnOutput ? out.toString().trim() : null; |
|
} |
|
|
|
function main() { |
|
// 1) discover all timestamped subdirs, sorted numerically |
|
const subdirs = fs |
|
.readdirSync(ARCHIVE_ROOT) |
|
.filter((d) => fs.statSync(path.join(ARCHIVE_ROOT, d)).isDirectory()) |
|
.sort((a, b) => parseFloat(a) - parseFloat(b)); |
|
|
|
// 2) create the “Imports” collection |
|
const collName = esc("Imports").slice(0, 254); |
|
const collDesc = esc("").slice(0, 254); |
|
const collColor = esc("#0ea5e9").slice(0, 50); |
|
|
|
const collSql = ` |
|
INSERT INTO "Collection" |
|
(name, description, color, "ownerId", "createdById", "createdAt", "updatedAt", "isPublic") |
|
VALUES |
|
('${collName}','${collDesc}','${collColor}',${USER_ID},${USER_ID},NOW(),NOW(),false) |
|
RETURNING id; |
|
`; |
|
const collectionId = runPSQL(collSql, true); |
|
console.log(`✔ Created collection ${collectionId}`); |
|
|
|
// prepare destination folder |
|
const DEST_ROOT = "linkwarden-archives"; |
|
const collDir = path.join(DEST_ROOT, collectionId); |
|
fs.mkdirSync(collDir, { recursive: true }); |
|
|
|
// 3) insert each link, copy files, then UPDATE file columns |
|
subdirs.forEach((subdir, idx) => { |
|
const srcDir = path.join(ARCHIVE_ROOT, subdir); |
|
const idxPath = path.join(srcDir, "index.json"); |
|
if (!fs.existsSync(idxPath)) { |
|
console.warn(`⚠️ skipping ${subdir} (no index.json)`); |
|
return; |
|
} |
|
|
|
const data = JSON.parse(fs.readFileSync(idxPath, "utf8")); |
|
const name = esc(data.title || data.domain || data.url || "").slice(0, 254); |
|
const url = esc(data.url || "").slice(0, 2047); |
|
const importDate = data.bookmarked_date |
|
? toIso(data.bookmarked_date) |
|
: new Date().toISOString(); |
|
|
|
// insert the link and capture its id |
|
const linkSql = ` |
|
INSERT INTO "Link" |
|
(url, name, description, "importDate", "collectionId", "createdById", "createdAt", "updatedAt") |
|
VALUES |
|
('${url}','${name}','', '${importDate}', ${collectionId}, ${USER_ID}, NOW(), NOW()) |
|
RETURNING id; |
|
`; |
|
const linkId = runPSQL(linkSql, true); |
|
|
|
// base path for all artifacts |
|
const base = path.join(collDir, linkId); |
|
|
|
// copy singlefile.html → {linkId}.html |
|
const sf = path.join(srcDir, "singlefile.html"); |
|
if (fs.existsSync(sf)) { |
|
fs.copyFileSync(sf, base + ".html"); |
|
} |
|
// copy output.pdf → {linkId}.pdf |
|
const pd = path.join(srcDir, "output.pdf"); |
|
if (fs.existsSync(pd)) { |
|
fs.copyFileSync(pd, base + ".pdf"); |
|
} |
|
// copy screenshot.png → {linkId}.png |
|
const sn = path.join(srcDir, "screenshot.png"); |
|
if (fs.existsSync(sn)) { |
|
fs.copyFileSync(sn, base + ".png"); |
|
} |
|
|
|
// merge readability → {linkId}_readability.json |
|
const rdDir = path.join(srcDir, "readability"); |
|
if (fs.existsSync(rdDir)) { |
|
let meta = {}; |
|
const mF = path.join(rdDir, "article.json"); |
|
if (fs.existsSync(mF)) meta = JSON.parse(fs.readFileSync(mF, "utf8")); |
|
const hF = path.join(rdDir, "content.html"); |
|
const tF = path.join(rdDir, "content.txt"); |
|
meta.content = fs.existsSync(hF) ? fs.readFileSync(hF, "utf8") : ""; |
|
meta.textContent = fs.existsSync(tF) ? fs.readFileSync(tF, "utf8") : ""; |
|
fs.writeFileSync( |
|
base + "_readability.json", |
|
JSON.stringify(meta, null, 2) |
|
); |
|
} |
|
|
|
// build the UPDATE clause for any files we copied |
|
const updates = []; |
|
if (fs.existsSync(base + ".png")) { |
|
updates.push( |
|
`image='${path.posix.join(`archives/${collectionId}/${linkId}.png`)}'` |
|
); |
|
} |
|
if (fs.existsSync(base + ".pdf")) { |
|
updates.push( |
|
`pdf='${path.posix.join(`archives/${collectionId}/${linkId}.pdf`)}'` |
|
); |
|
} |
|
if (fs.existsSync(base + ".html")) { |
|
updates.push( |
|
`monolith='${path.posix.join( |
|
`archives/${collectionId}/${linkId}.html` |
|
)}'` |
|
); |
|
} |
|
if (fs.existsSync(base + "_readability.json")) { |
|
updates.push( |
|
`readable='${path.posix.join( |
|
`archives/${collectionId}/${linkId}_readability.json` |
|
)}'` |
|
); |
|
} |
|
|
|
if (updates.length) { |
|
const updSql = ` |
|
UPDATE "Link" |
|
SET ${updates.join(", ")} |
|
WHERE id = ${linkId}; |
|
`; |
|
runPSQL(updSql, false); |
|
} |
|
}); |
|
|
|
console.log("🎉 Import complete!"); |
|
console.log("Created a 'linkwarden-archives' directory."); |
|
console.warn( |
|
`⚠️ One final (but important) step: replace the 'linkwarden-archives/${collectionId}' directory with your Linkwarden instance’s 'data/archives/${collectionId}' directory.` |
|
); |
|
} |
|
|
|
main(); |