Last active
December 28, 2021 22:08
-
-
Save bramses/de6b72fbe134cc485fd3ada18c5bdc27 to your computer and use it in GitHub Desktop.
Embedding an Obsidian Document
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| This code is taking the input text and sending it to OpenAI's API. | |
| The response from the API is then parsed into a JSON object, which contains an array of embeddings for each sentence in the input. | |
| */ | |
| createEmbeddings = async (input: string[]): Promise<EmbeddingsResponse|null> => { | |
| try { | |
| input = input.map(inp => inp.replace(/\n/g, ' ')) | |
| const body = { input }; | |
| const response = await axios.post(this.endpoint, JSON.stringify(body), | |
| { | |
| headers: { | |
| 'Content-Type': 'application/json', | |
| 'Authorization': `Bearer ${this.apiKey}` | |
| } | |
| } | |
| ); | |
| const data: OpenAIResponse = await response.data; | |
| const embeddings = data.data.map(d => d.embedding); | |
| this.embeddings = { embeddings, text: input }; | |
| return { embeddings, text: input }; | |
| } catch (err) { | |
| console.error(err); | |
| return null; | |
| } | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| async embedObsidianDocument(filename: string) { | |
| const txt = await readFile(filename, true) | |
| const chunks = txt // split a text file into lines and then chunk them into max tokens [[chunk]] | |
| .split('\n') | |
| .filter(line => line.length > 0) | |
| .map(line => JSON.stringify(line.trim())) | |
| .map(doc => chunkDocument(doc)) | |
| const flattenedChunks = _.flatten(chunks) | |
| const docEmbeddings = await this.embeddingsObj.createEmbeddings(flattenedChunks!) // create OpenAI embeddings for the flat list | |
| const doc = { | |
| 'filename': filename, | |
| 'chunks': chunks, | |
| 'embeddingsResponse': docEmbeddings as { | |
| embeddings: number[][] | |
| text: string[] | |
| } | |
| } | |
| this.documentWithEmbeddings = doc | |
| return doc | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment