Last active
October 14, 2024 05:32
-
-
Save jkoop/9c069098fc0ff188c4cbdcc7c2f73416 to your computer and use it in GitHub Desktop.
Automatically generate and set a title for documents with a specific tag in paperless.ngx using ollama
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! bun run | |
/** | |
* The author disclaims copyright to this source code. | |
*/ | |
const PAPERLESS_URL = process.env.PAPERLESS_URL ?? ""; | |
const PAPERLESS_TOKEN = process.env.PAPERLESS_TOKEN ?? ""; | |
const PAPERLESS_TAG_ID = process.env.PAPERLESS_TAG_ID ?? 0; | |
const OLLAMA_URL = process.env.OLLAMA_URL ?? ""; | |
if (PAPERLESS_URL.length < 1) throw new Error("PAPERLESS_URL must be set."); | |
if (PAPERLESS_TOKEN.length < 1) throw new Error("PAPERLESS_TOKEN must be set."); | |
if (PAPERLESS_TAG_ID < 1) throw new Error("PAPERLESS_TAG_ID must be set."); | |
if (OLLAMA_URL.length < 1) throw new Error("OLLAMA_URL must be set."); | |
async function autotitle(offset = 0) { | |
const docs = | |
( | |
await fetch( | |
`${PAPERLESS_URL}/api/documents/?page=${offset + 1}&page_size=1&tags__id__all=${PAPERLESS_TAG_ID}`, | |
{ | |
headers: { | |
Authorization: `Token ${PAPERLESS_TOKEN}`, | |
}, | |
redirect: "error", | |
}, | |
).then((res) => res.json()) | |
).results ?? []; | |
if (docs.length < 1) { | |
console.log("Nothing to do."); | |
return; | |
} | |
for (const doc of docs) { | |
const content = Array.from(new Set(doc.content.split("\n"))); | |
if ( | |
content.length == 1 && | |
content[0] == "Scanned by VueScan - get a free trial at www.hamrick.com" | |
) { | |
console.log(`Document ${doc.id} only contains the scanner's watermark.`); | |
continue; | |
} | |
if (doc.content.trim().length < 1) { | |
console.log(`Document ${doc.id} doesn't have any content.`); | |
continue; | |
} | |
const title = ( | |
await fetch(`${OLLAMA_URL}/api/generate`, { | |
headers: { | |
"Content-Type": "application/json", | |
}, | |
method: "POST", | |
body: JSON.stringify({ | |
model: "llama3.1:latest", | |
prompt: `${doc.content}\n\nWhat's a plausible title for the above document? Only return the title; no comments; no quotation marks.`, | |
stream: false, | |
}), | |
}).then((res) => res.json()) | |
).response; | |
console.log(`Document ${doc.id}'s new title is "${title}".`); | |
await fetch(`${PAPERLESS_URL}/api/documents/${doc.id}/`, { | |
method: "PUT", | |
headers: { | |
Authorization: `Token ${PAPERLESS_TOKEN}`, | |
"Content-Type": "application/json", | |
}, | |
body: JSON.stringify({ | |
correspondent: doc.correspondent, | |
document_type: doc.document_type, | |
storage_path: doc.storage_path, | |
tags: doc.tags.filter((tagId) => tagId != PAPERLESS_TAG_ID), | |
title: title, | |
}), | |
}); | |
} | |
await autotitle(offset + 1); | |
} | |
while (true) { | |
await autotitle(0); | |
await new Promise((resolve) => setTimeout(resolve, 2000)); // wait for 2 seconds | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment