-
-
Save ichim-david/4b3e36c803bfb73b3dd8aed9120d00ad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Anthropic from '@anthropic-ai/sdk'; | |
import { Glob } from 'bun'; | |
export const anthropic = new Anthropic({ | |
apiKey: process.env.ANTHROPIC_KEY | |
}); | |
const glob = new Glob("./txt/*.txt"); | |
type Page = { | |
pageNumber: number; | |
fileContent: string; | |
file: string; | |
} | |
let files = []; | |
// Scans the current working directory and each of its sub-directories recursively | |
for await (const file of glob.scan(".")) { | |
const fileContent = await Bun.file(file).text(); | |
const pageNumber = parseInt(file.split("/").pop()?.split('_')[0]); | |
files.push({ | |
pageNumber, | |
fileContent, | |
file | |
}) | |
} | |
files.sort((a, b) => a.pageNumber - b.pageNumber); | |
// let transations = []; | |
for (const file of files) { | |
const existing = Bun.file(`./translations/${file.pageNumber}.md`); | |
if (await existing.exists()) { | |
console.log(`Skipping ${file.pageNumber}.md`); | |
// This is some code I used to do some light formatting | |
// const content = await existing.text(); | |
// transations.push(content); | |
// const newContent = `## [Page ${file.pageNumber}]\n\n${content.replaceAll('# ', '## ')}`; | |
// // overwrite the file with the new content | |
// await Bun.write(`./translations/${file.pageNumber}.md`, newContent); | |
// console.log(newContent ); | |
continue; | |
} | |
console.log(`Translating ${file.pageNumber}.md`); | |
const result = await completion(file.fileContent); | |
// Save the file to disk | |
await Bun.write(`./translations/${file.pageNumber}.md`, result.content.at(0)?.text) | |
} | |
async function completion(fileContent: string) { | |
const message = await anthropic.messages.create({ | |
max_tokens: 4096, | |
messages: [{ | |
role: 'user', content: `This text is the output of a diary transcription. The language is dutch. There are some mistakes. Can you fix the mistakes and translate the dutch to english? Add headings for each dated journal entry. Convert the dates to readable english dates. Return only english. Return only the result. Format as markdown. | |
${fileContent} | |
` }, | |
], | |
// model: 'claude-3-opus-20240229', | |
model: 'claude-3-opus-20240229', | |
}); | |
console.log(message); | |
return message; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment