daniel31x13 · July 21, 2025 20:20
diff --git a/README.md b/README.md
diff --git a/archivebox2linkwarden.js b/archivebox2linkwarden.js
 #!/usr/bin/env node

 const fs = require("fs");
 const path = require("path");
 const { execSync } = require("child_process");

 const raw = process.argv.slice(2);

 function getFlag(names) {
  for (let n of names) {
    const idx = raw.indexOf(n);
    if (idx !== -1 && idx + 1 < raw.length) return raw[idx + 1];
  }
 }
 const ARCHIVE_ROOT = getFlag(["--path"]);
 const USER_ID = parseInt(getFlag(["--user-id"]), 10);
 const POSTGRES_CONTAINER = getFlag(["--postgres-container-id"]);

 if (!ARCHIVE_ROOT || isNaN(USER_ID)) {
  console.error(
    "Usage: ./archivebox2linkwarden.js --path <archiveDir> --user-id <yourUserId> --postgres-container-id <containerId>"
  );
  process.exit(1);
 }

 const DATABASE_URL = process.env.DATABASE_URL;
 if (!POSTGRES_CONTAINER && !DATABASE_URL) {
  console.error(
    "Error: no DATABASE_URL or --postgres-container-id flag specified.\n" +
      "Please set DATABASE_URL=postgres://… (or set the --postgres-container-id flag if you are using Docker)"
  );
  process.exit(1);
 }

 // escape single quotes for SQL literals
 function esc(str) {
  return str.replace(/'/g, "''");
 }
 // normalize "YYYY-MM-DD HH:mm" → ISO8601
 function toIso(s) {
  return new Date(s.replace(" ", "T") + ":00Z").toISOString();
 }

 /**
 * Run SQL via either host psql or docker exec.
 * returnOutput=true captures and returns the output (for RETURNING id).
 */
 function runPSQL(sql, returnOutput = false) {
  const flags = returnOutput ? "-q -t -A --no-psqlrc" : "-q --no-psqlrc";
  const escaped = sql.replace(/"/g, '\\"');

  let cmd, opts;
  if (POSTGRES_CONTAINER) {
    // exec into given container
    cmd = `docker exec -i ${POSTGRES_CONTAINER} psql -U postgres -d postgres ${flags} -c "${escaped}"`;
    opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" };
  } else {
    cmd = `psql "${DATABASE_URL}" ${flags} -c "${escaped}"`;
    opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" };
  }

  const out = execSync(cmd, opts);
  return returnOutput ? out.toString().trim() : null;
 }

 function main() {
  // 1) discover all timestamped subdirs, sorted numerically
  const subdirs = fs
    .readdirSync(ARCHIVE_ROOT)
    .filter((d) => fs.statSync(path.join(ARCHIVE_ROOT, d)).isDirectory())
    .sort((a, b) => parseFloat(a) - parseFloat(b));

  // 2) create the “Imports” collection
  const collName = esc("Imports").slice(0, 254);
  const collDesc = esc("").slice(0, 254);
  const collColor = esc("#0ea5e9").slice(0, 50);

  const collSql = `
    INSERT INTO "Collection"
      (name, description, color, "ownerId", "createdById", "createdAt", "updatedAt", "isPublic")
    VALUES
      ('${collName}','${collDesc}','${collColor}',${USER_ID},${USER_ID},NOW(),NOW(),false)
    RETURNING id;
  `;
  const collectionId = runPSQL(collSql, true);
  console.log(`✔ Created collection ${collectionId}`);

  // prepare destination folder
  const DEST_ROOT = "linkwarden-archives";
  const collDir = path.join(DEST_ROOT, collectionId);
  fs.mkdirSync(collDir, { recursive: true });

  // 3) insert each link, copy files, then UPDATE file columns
  subdirs.forEach((subdir, idx) => {
    const srcDir = path.join(ARCHIVE_ROOT, subdir);
    const idxPath = path.join(srcDir, "index.json");
    if (!fs.existsSync(idxPath)) {
      console.warn(`⚠️  skipping ${subdir} (no index.json)`);
      return;
    }

    const data = JSON.parse(fs.readFileSync(idxPath, "utf8"));
    const name = esc(data.title || data.domain || data.url || "").slice(0, 254);
    const url = esc(data.url || "").slice(0, 2047);
    const importDate = data.bookmarked_date
      ? toIso(data.bookmarked_date)
      : new Date().toISOString();

    // insert the link and capture its id
    const linkSql = `
      INSERT INTO "Link"
        (url, name, description, "importDate", "collectionId", "createdById", "createdAt", "updatedAt")
      VALUES
        ('${url}','${name}','', '${importDate}', ${collectionId}, ${USER_ID}, NOW(), NOW())
      RETURNING id;
    `;
    const linkId = runPSQL(linkSql, true);

    // base path for all artifacts
    const base = path.join(collDir, linkId);

    // copy singlefile.html → {linkId}.html
    const sf = path.join(srcDir, "singlefile.html");
    if (fs.existsSync(sf)) {
      fs.copyFileSync(sf, base + ".html");
    }
    // copy output.pdf → {linkId}.pdf
    const pd = path.join(srcDir, "output.pdf");
    if (fs.existsSync(pd)) {
      fs.copyFileSync(pd, base + ".pdf");
    }
    // copy screenshot.png → {linkId}.png
    const sn = path.join(srcDir, "screenshot.png");
    if (fs.existsSync(sn)) {
      fs.copyFileSync(sn, base + ".png");
    }

    // merge readability → {linkId}_readability.json
    const rdDir = path.join(srcDir, "readability");
    if (fs.existsSync(rdDir)) {
      let meta = {};
      const mF = path.join(rdDir, "article.json");
      if (fs.existsSync(mF)) meta = JSON.parse(fs.readFileSync(mF, "utf8"));
      const hF = path.join(rdDir, "content.html");
      const tF = path.join(rdDir, "content.txt");
      meta.content = fs.existsSync(hF) ? fs.readFileSync(hF, "utf8") : "";
      meta.textContent = fs.existsSync(tF) ? fs.readFileSync(tF, "utf8") : "";
      fs.writeFileSync(
        base + "_readability.json",
        JSON.stringify(meta, null, 2)
      );
    }

    // build the UPDATE clause for any files we copied
    const updates = [];
    if (fs.existsSync(base + ".png")) {
      updates.push(
        `image='${path.posix.join(`archives/${collectionId}/${linkId}.png`)}'`
      );
    }
    if (fs.existsSync(base + ".pdf")) {
      updates.push(
        `pdf='${path.posix.join(`archives/${collectionId}/${linkId}.pdf`)}'`
      );
    }
    if (fs.existsSync(base + ".html")) {
      updates.push(
        `monolith='${path.posix.join(
          `archives/${collectionId}/${linkId}.html`
        )}'`
      );
    }
    if (fs.existsSync(base + "_readability.json")) {
      updates.push(
        `readable='${path.posix.join(
          `archives/${collectionId}/${linkId}_readability.json`
        )}'`
      );
    }

    if (updates.length) {
      const updSql = `
        UPDATE "Link"
        SET ${updates.join(", ")}
        WHERE id = ${linkId};
      `;
      runPSQL(updSql, false);
    }
  });

  console.log("🎉 Import complete!");
  console.log("Created a 'linkwarden-archives' directory.");
  console.warn(
    `⚠️  One final (but important) step: replace the 'linkwarden-archives/${collectionId}' directory with your Linkwarden instance’s 'data/archives/${collectionId}' directory.`
  );
 }

 main();
	#!/usr/bin/env node

	const fs = require("fs");
	const path = require("path");
	const { execSync } = require("child_process");

	const raw = process.argv.slice(2);

	function getFlag(names) {
	for (let n of names) {
	const idx = raw.indexOf(n);
	if (idx !== -1 && idx + 1 < raw.length) return raw[idx + 1];
	}
	}
	const ARCHIVE_ROOT = getFlag(["--path"]);
	const USER_ID = parseInt(getFlag(["--user-id"]), 10);
	const POSTGRES_CONTAINER = getFlag(["--postgres-container-id"]);

	if (!ARCHIVE_ROOT \|\| isNaN(USER_ID)) {
	console.error(
	"Usage: ./archivebox2linkwarden.js --path <archiveDir> --user-id <yourUserId> --postgres-container-id <containerId>"
	);
	process.exit(1);
	}

	const DATABASE_URL = process.env.DATABASE_URL;
	if (!POSTGRES_CONTAINER && !DATABASE_URL) {
	console.error(
	"Error: no DATABASE_URL or --postgres-container-id flag specified.\n" +
	"Please set DATABASE_URL=postgres://… (or set the --postgres-container-id flag if you are using Docker)"
	);
	process.exit(1);
	}

	// escape single quotes for SQL literals
	function esc(str) {
	return str.replace(/'/g, "''");
	}
	// normalize "YYYY-MM-DD HH:mm" → ISO8601
	function toIso(s) {
	return new Date(s.replace(" ", "T") + ":00Z").toISOString();
	}

	/**
	* Run SQL via either host psql or docker exec.
	* returnOutput=true captures and returns the output (for RETURNING id).
	*/
	function runPSQL(sql, returnOutput = false) {
	const flags = returnOutput ? "-q -t -A --no-psqlrc" : "-q --no-psqlrc";
	const escaped = sql.replace(/"/g, '\\"');

	let cmd, opts;
	if (POSTGRES_CONTAINER) {
	// exec into given container
	cmd = `docker exec -i ${POSTGRES_CONTAINER} psql -U postgres -d postgres ${flags} -c "${escaped}"`;
	opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" };
	} else {
	cmd = `psql "${DATABASE_URL}" ${flags} -c "${escaped}"`;
	opts = returnOutput ? { stdio: "pipe" } : { stdio: "inherit" };
	}

	const out = execSync(cmd, opts);
	return returnOutput ? out.toString().trim() : null;
	}

	function main() {
	// 1) discover all timestamped subdirs, sorted numerically
	const subdirs = fs
	.readdirSync(ARCHIVE_ROOT)
	.filter((d) => fs.statSync(path.join(ARCHIVE_ROOT, d)).isDirectory())
	.sort((a, b) => parseFloat(a) - parseFloat(b));

	// 2) create the “Imports” collection
	const collName = esc("Imports").slice(0, 254);
	const collDesc = esc("").slice(0, 254);
	const collColor = esc("#0ea5e9").slice(0, 50);

	const collSql = `
	INSERT INTO "Collection"
	(name, description, color, "ownerId", "createdById", "createdAt", "updatedAt", "isPublic")
	VALUES
	('${collName}','${collDesc}','${collColor}',${USER_ID},${USER_ID},NOW(),NOW(),false)
	RETURNING id;
	`;
	const collectionId = runPSQL(collSql, true);
	console.log(`✔ Created collection ${collectionId}`);

	// prepare destination folder
	const DEST_ROOT = "linkwarden-archives";
	const collDir = path.join(DEST_ROOT, collectionId);
	fs.mkdirSync(collDir, { recursive: true });

	// 3) insert each link, copy files, then UPDATE file columns
	subdirs.forEach((subdir, idx) => {
	const srcDir = path.join(ARCHIVE_ROOT, subdir);
	const idxPath = path.join(srcDir, "index.json");
	if (!fs.existsSync(idxPath)) {
	console.warn(`⚠️ skipping ${subdir} (no index.json)`);
	return;
	}

	const data = JSON.parse(fs.readFileSync(idxPath, "utf8"));
	const name = esc(data.title \|\| data.domain \|\| data.url \|\| "").slice(0, 254);
	const url = esc(data.url \|\| "").slice(0, 2047);
	const importDate = data.bookmarked_date
	? toIso(data.bookmarked_date)
	: new Date().toISOString();

	// insert the link and capture its id
	const linkSql = `
	INSERT INTO "Link"
	(url, name, description, "importDate", "collectionId", "createdById", "createdAt", "updatedAt")
	VALUES
	('${url}','${name}','', '${importDate}', ${collectionId}, ${USER_ID}, NOW(), NOW())
	RETURNING id;
	`;
	const linkId = runPSQL(linkSql, true);

	// base path for all artifacts
	const base = path.join(collDir, linkId);

	// copy singlefile.html → {linkId}.html
	const sf = path.join(srcDir, "singlefile.html");
	if (fs.existsSync(sf)) {
	fs.copyFileSync(sf, base + ".html");
	}
	// copy output.pdf → {linkId}.pdf
	const pd = path.join(srcDir, "output.pdf");
	if (fs.existsSync(pd)) {
	fs.copyFileSync(pd, base + ".pdf");
	}
	// copy screenshot.png → {linkId}.png
	const sn = path.join(srcDir, "screenshot.png");
	if (fs.existsSync(sn)) {
	fs.copyFileSync(sn, base + ".png");
	}

	// merge readability → {linkId}_readability.json
	const rdDir = path.join(srcDir, "readability");
	if (fs.existsSync(rdDir)) {
	let meta = {};
	const mF = path.join(rdDir, "article.json");
	if (fs.existsSync(mF)) meta = JSON.parse(fs.readFileSync(mF, "utf8"));
	const hF = path.join(rdDir, "content.html");
	const tF = path.join(rdDir, "content.txt");
	meta.content = fs.existsSync(hF) ? fs.readFileSync(hF, "utf8") : "";
	meta.textContent = fs.existsSync(tF) ? fs.readFileSync(tF, "utf8") : "";
	fs.writeFileSync(
	base + "_readability.json",
	JSON.stringify(meta, null, 2)
	);
	}

	// build the UPDATE clause for any files we copied
	const updates = [];
	if (fs.existsSync(base + ".png")) {
	updates.push(
	`image='${path.posix.join(`archives/${collectionId}/${linkId}.png`)}'`
	);
	}
	if (fs.existsSync(base + ".pdf")) {
	updates.push(
	`pdf='${path.posix.join(`archives/${collectionId}/${linkId}.pdf`)}'`
	);
	}
	if (fs.existsSync(base + ".html")) {
	updates.push(
	`monolith='${path.posix.join(
	`archives/${collectionId}/${linkId}.html`
	)}'`
	);
	}
	if (fs.existsSync(base + "_readability.json")) {
	updates.push(
	`readable='${path.posix.join(
	`archives/${collectionId}/${linkId}_readability.json`
	)}'`
	);
	}

	if (updates.length) {
	const updSql = `
	UPDATE "Link"
	SET ${updates.join(", ")}
	WHERE id = ${linkId};
	`;
	runPSQL(updSql, false);
	}
	});

	console.log("🎉 Import complete!");
	console.log("Created a 'linkwarden-archives' directory.");
	console.warn(
	`⚠️ One final (but important) step: replace the 'linkwarden-archives/${collectionId}' directory with your Linkwarden instance’s 'data/archives/${collectionId}' directory.`
	);
	}

	main();