Last active
October 21, 2024 20:51
-
-
Save MohamedBassem/2c5e71347dadb8652c1aa1b8683b4454 to your computer and use it in GitHub Desktop.
Scrape Obsidian Vault and upload it to Outline
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* This code is mostly written by gpt-4. | |
* | |
* This code will scrape an obsidian vault, and will recreate its structure in a new outline collection. | |
* The code will preserve the inline images in obsidian and upload them to outline correctly. | |
*/ | |
const fs = require("fs"); | |
const path = require("path"); | |
const axios = require("axios"); | |
const FormData = require("form-data"); | |
const outlineUrl = "<OUTLINE_URL>"; | |
const vaultRoot = "<VAULT_PATH>"; | |
const attachmentDir = "<RELATIVE_ATTACHMENT_DIR>"; | |
const apiToken = "<OUTLINE_API_KEY>"; | |
const collectionName = "<IMPORT_COLLECTION_NAME>"; | |
async function scrapeObsidianVault(vaultPath) { | |
const vault = []; | |
const files = fs.readdirSync(vaultPath); | |
for (const file of files) { | |
const filePath = path.join(vaultPath, file); | |
const stat = fs.statSync(filePath); | |
const relativePath = path.relative(vaultRoot, filePath); | |
if (stat.isDirectory()) { | |
vault.push({ | |
path: relativePath, | |
content: "", | |
}); | |
vault.push(...(await scrapeObsidianVault(filePath))); | |
} else if (file.endsWith(".md")) { | |
const content = fs.readFileSync(filePath, "utf8"); | |
vault.push({ | |
path: relativePath, | |
content, | |
}); | |
} | |
} | |
return vault; | |
} | |
function authenticate(apiToken) { | |
return { | |
Authorization: `Bearer ${apiToken}`, | |
"Content-Type": "application/json", | |
}; | |
} | |
function removeFrontmatter(content) { | |
if (!content.startsWith("---")) { | |
return content; | |
} | |
const lines = content.split("\n"); | |
let endIndex = -1; | |
for (let i = 1; i < lines.length; i++) { | |
if (lines[i].trim() === "---") { | |
endIndex = i; | |
break; | |
} | |
} | |
if (endIndex === -1) { | |
throw new Error("Invalid frontmatter: no closing delimiter found"); | |
} | |
const contentWithoutFrontmatter = lines | |
.slice(endIndex + 1) | |
.join("\n") | |
.trim(); | |
return contentWithoutFrontmatter; | |
} | |
async function createDocument( | |
headers, | |
title, | |
text, | |
collectionId, | |
parentDocumentId = null, | |
) { | |
const url = `${outlineUrl}/api/documents.create`; | |
const payload = { | |
title, | |
text, | |
collectionId, | |
parentDocumentId, | |
publish: true, | |
}; | |
const response = await axios.post(url, payload, { headers }); | |
return response.data; | |
} | |
async function updateDocument(headers, id, text) { | |
const url = `${outlineUrl}/api/documents.update`; | |
const payload = { id, text }; | |
const response = await axios.post(url, payload, { headers }); | |
return response.data; | |
} | |
async function createCollection(headers, name) { | |
const url = `${outlineUrl}/api/collections.create`; | |
const payload = { name }; | |
const response = await axios.post(url, payload, { headers }); | |
return response.data; | |
} | |
async function createAttachment( | |
headers, | |
documentId, | |
name, | |
filePath, | |
contentType, | |
size, | |
) { | |
const url = `${outlineUrl}/api/attachments.create`; | |
const payload = { | |
documentId, | |
name, | |
contentType, | |
size, | |
preset: "documentAttachment", | |
}; | |
const response = await axios.post(url, payload, { headers }); | |
const json = response.data.data; | |
const signedUrl = outlineUrl + json.uploadUrl; | |
const uploadPayload = { | |
key: json.form.key, | |
}; | |
const formData = new FormData(); | |
Object.keys(uploadPayload).forEach((key) => { | |
formData.append(key, uploadPayload[key]); | |
}); | |
formData.append("file", fs.createReadStream(filePath)); | |
const uploadResponse = await axios.post(signedUrl, formData, { | |
headers: { | |
...headers, | |
...formData.getHeaders(), | |
}, | |
}); | |
return { status: uploadResponse.status, data: json.attachment.url }; | |
} | |
function trimFilename(filename) { | |
return filename.replace(/^\d{12}/, ""); | |
} | |
function wrapUrlsInMarkdown(text) { | |
const urlRegex = /(?<!\[[^\]]*)\b(https?:\/\/[^\s]+)\b(?!\])+/g; | |
return text.replace(urlRegex, (url) => `[${url}](${url})`); | |
} | |
function extractImageLinks(content) { | |
const found = content.match(/!\[\[(.*?)\]\]/g); | |
if (!found) return []; | |
return found.map((link) => link.replace(/!\[\[(.*?)\]\]/, "$1")); | |
} | |
async function migrateVaultToOutline( | |
vault, | |
attachmentDir, | |
headers, | |
collectionId, | |
) { | |
const documentIds = {}; | |
for (let { path: fpath, content } of vault) { | |
const title = trimFilename(path.basename(fpath, ".md")).substring(0, 99); | |
const parentPath = path.dirname(fpath); | |
const parentDocumentId = documentIds[parentPath] || null; | |
content = wrapUrlsInMarkdown(removeFrontmatter(content)); | |
const documentResponse = await createDocument( | |
headers, | |
title, | |
content, | |
collectionId, | |
parentDocumentId, | |
); | |
const documentId = documentResponse.data.id; | |
const imageLinks = extractImageLinks(content); | |
for (const link of imageLinks) { | |
const imagePath = path.join(vaultRoot, attachmentDir, link); | |
if (fs.existsSync(imagePath)) { | |
const size = fs.statSync(imagePath).size; | |
const contentType = "image/png"; // You may need to detect the MIME type dynamically | |
const { status, data: imageUrl } = await createAttachment( | |
headers, | |
documentId, | |
path.basename(imagePath), | |
imagePath, | |
contentType, | |
size, | |
); | |
if (status === 200) { | |
content = content.replace(`![[${link}]]`, `![${link}](${imageUrl})`); | |
} | |
} | |
} | |
await updateDocument(headers, documentId, content); | |
documentIds[fpath] = documentId; | |
} | |
} | |
// Example usage | |
(async () => { | |
const vault = await scrapeObsidianVault(vaultRoot); | |
const headers = authenticate(apiToken); | |
const collectionResponse = await createCollection(headers, collectionName); | |
const collectionId = collectionResponse.data.id; | |
await migrateVaultToOutline(vault, attachmentDir, headers, collectionId); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment