Skip to content

Instantly share code, notes, and snippets.

@jkoop
Last active October 14, 2024 05:32
Show Gist options
  • Save jkoop/9c069098fc0ff188c4cbdcc7c2f73416 to your computer and use it in GitHub Desktop.
Save jkoop/9c069098fc0ff188c4cbdcc7c2f73416 to your computer and use it in GitHub Desktop.
Automatically generate and set a title for documents with a specific tag in paperless.ngx using ollama
#! bun run
/**
* The author disclaims copyright to this source code.
*/
const PAPERLESS_URL = process.env.PAPERLESS_URL ?? "";
const PAPERLESS_TOKEN = process.env.PAPERLESS_TOKEN ?? "";
const PAPERLESS_TAG_ID = process.env.PAPERLESS_TAG_ID ?? 0;
const OLLAMA_URL = process.env.OLLAMA_URL ?? "";
if (PAPERLESS_URL.length < 1) throw new Error("PAPERLESS_URL must be set.");
if (PAPERLESS_TOKEN.length < 1) throw new Error("PAPERLESS_TOKEN must be set.");
if (PAPERLESS_TAG_ID < 1) throw new Error("PAPERLESS_TAG_ID must be set.");
if (OLLAMA_URL.length < 1) throw new Error("OLLAMA_URL must be set.");
async function autotitle(offset = 0) {
const docs =
(
await fetch(
`${PAPERLESS_URL}/api/documents/?page=${offset + 1}&page_size=1&tags__id__all=${PAPERLESS_TAG_ID}`,
{
headers: {
Authorization: `Token ${PAPERLESS_TOKEN}`,
},
redirect: "error",
},
).then((res) => res.json())
).results ?? [];
if (docs.length < 1) {
console.log("Nothing to do.");
return;
}
for (const doc of docs) {
const content = Array.from(new Set(doc.content.split("\n")));
if (
content.length == 1 &&
content[0] == "Scanned by VueScan - get a free trial at www.hamrick.com"
) {
console.log(`Document ${doc.id} only contains the scanner's watermark.`);
continue;
}
if (doc.content.trim().length < 1) {
console.log(`Document ${doc.id} doesn't have any content.`);
continue;
}
const title = (
await fetch(`${OLLAMA_URL}/api/generate`, {
headers: {
"Content-Type": "application/json",
},
method: "POST",
body: JSON.stringify({
model: "llama3.1:latest",
prompt: `${doc.content}\n\nWhat's a plausible title for the above document? Only return the title; no comments; no quotation marks.`,
stream: false,
}),
}).then((res) => res.json())
).response;
console.log(`Document ${doc.id}'s new title is "${title}".`);
await fetch(`${PAPERLESS_URL}/api/documents/${doc.id}/`, {
method: "PUT",
headers: {
Authorization: `Token ${PAPERLESS_TOKEN}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
correspondent: doc.correspondent,
document_type: doc.document_type,
storage_path: doc.storage_path,
tags: doc.tags.filter((tagId) => tagId != PAPERLESS_TAG_ID),
title: title,
}),
});
}
await autotitle(offset + 1);
}
while (true) {
await autotitle(0);
await new Promise((resolve) => setTimeout(resolve, 2000)); // wait for 2 seconds
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment