Skip to content

Instantly share code, notes, and snippets.

@MohamedBassem
Last active October 21, 2024 20:51
Show Gist options
  • Save MohamedBassem/2c5e71347dadb8652c1aa1b8683b4454 to your computer and use it in GitHub Desktop.
Save MohamedBassem/2c5e71347dadb8652c1aa1b8683b4454 to your computer and use it in GitHub Desktop.
Scrape Obsidian Vault and upload it to Outline
/**
* This code is mostly written by gpt-4.
*
* This code will scrape an obsidian vault, and will recreate its structure in a new outline collection.
* The code will preserve the inline images in obsidian and upload them to outline correctly.
*/
const fs = require("fs");
const path = require("path");
const axios = require("axios");
const FormData = require("form-data");
const outlineUrl = "<OUTLINE_URL>";
const vaultRoot = "<VAULT_PATH>";
const attachmentDir = "<RELATIVE_ATTACHMENT_DIR>";
const apiToken = "<OUTLINE_API_KEY>";
const collectionName = "<IMPORT_COLLECTION_NAME>";
async function scrapeObsidianVault(vaultPath) {
const vault = [];
const files = fs.readdirSync(vaultPath);
for (const file of files) {
const filePath = path.join(vaultPath, file);
const stat = fs.statSync(filePath);
const relativePath = path.relative(vaultRoot, filePath);
if (stat.isDirectory()) {
vault.push({
path: relativePath,
content: "",
});
vault.push(...(await scrapeObsidianVault(filePath)));
} else if (file.endsWith(".md")) {
const content = fs.readFileSync(filePath, "utf8");
vault.push({
path: relativePath,
content,
});
}
}
return vault;
}
function authenticate(apiToken) {
return {
Authorization: `Bearer ${apiToken}`,
"Content-Type": "application/json",
};
}
function removeFrontmatter(content) {
if (!content.startsWith("---")) {
return content;
}
const lines = content.split("\n");
let endIndex = -1;
for (let i = 1; i < lines.length; i++) {
if (lines[i].trim() === "---") {
endIndex = i;
break;
}
}
if (endIndex === -1) {
throw new Error("Invalid frontmatter: no closing delimiter found");
}
const contentWithoutFrontmatter = lines
.slice(endIndex + 1)
.join("\n")
.trim();
return contentWithoutFrontmatter;
}
async function createDocument(
headers,
title,
text,
collectionId,
parentDocumentId = null,
) {
const url = `${outlineUrl}/api/documents.create`;
const payload = {
title,
text,
collectionId,
parentDocumentId,
publish: true,
};
const response = await axios.post(url, payload, { headers });
return response.data;
}
async function updateDocument(headers, id, text) {
const url = `${outlineUrl}/api/documents.update`;
const payload = { id, text };
const response = await axios.post(url, payload, { headers });
return response.data;
}
async function createCollection(headers, name) {
const url = `${outlineUrl}/api/collections.create`;
const payload = { name };
const response = await axios.post(url, payload, { headers });
return response.data;
}
async function createAttachment(
headers,
documentId,
name,
filePath,
contentType,
size,
) {
const url = `${outlineUrl}/api/attachments.create`;
const payload = {
documentId,
name,
contentType,
size,
preset: "documentAttachment",
};
const response = await axios.post(url, payload, { headers });
const json = response.data.data;
const signedUrl = outlineUrl + json.uploadUrl;
const uploadPayload = {
key: json.form.key,
};
const formData = new FormData();
Object.keys(uploadPayload).forEach((key) => {
formData.append(key, uploadPayload[key]);
});
formData.append("file", fs.createReadStream(filePath));
const uploadResponse = await axios.post(signedUrl, formData, {
headers: {
...headers,
...formData.getHeaders(),
},
});
return { status: uploadResponse.status, data: json.attachment.url };
}
function trimFilename(filename) {
return filename.replace(/^\d{12}/, "");
}
function wrapUrlsInMarkdown(text) {
const urlRegex = /(?<!\[[^\]]*)\b(https?:\/\/[^\s]+)\b(?!\])+/g;
return text.replace(urlRegex, (url) => `[${url}](${url})`);
}
function extractImageLinks(content) {
const found = content.match(/!\[\[(.*?)\]\]/g);
if (!found) return [];
return found.map((link) => link.replace(/!\[\[(.*?)\]\]/, "$1"));
}
async function migrateVaultToOutline(
vault,
attachmentDir,
headers,
collectionId,
) {
const documentIds = {};
for (let { path: fpath, content } of vault) {
const title = trimFilename(path.basename(fpath, ".md")).substring(0, 99);
const parentPath = path.dirname(fpath);
const parentDocumentId = documentIds[parentPath] || null;
content = wrapUrlsInMarkdown(removeFrontmatter(content));
const documentResponse = await createDocument(
headers,
title,
content,
collectionId,
parentDocumentId,
);
const documentId = documentResponse.data.id;
const imageLinks = extractImageLinks(content);
for (const link of imageLinks) {
const imagePath = path.join(vaultRoot, attachmentDir, link);
if (fs.existsSync(imagePath)) {
const size = fs.statSync(imagePath).size;
const contentType = "image/png"; // You may need to detect the MIME type dynamically
const { status, data: imageUrl } = await createAttachment(
headers,
documentId,
path.basename(imagePath),
imagePath,
contentType,
size,
);
if (status === 200) {
content = content.replace(`![[${link}]]`, `![${link}](${imageUrl})`);
}
}
}
await updateDocument(headers, documentId, content);
documentIds[fpath] = documentId;
}
}
// Example usage
(async () => {
const vault = await scrapeObsidianVault(vaultRoot);
const headers = authenticate(apiToken);
const collectionResponse = await createCollection(headers, collectionName);
const collectionId = collectionResponse.data.id;
await migrateVaultToOutline(vault, attachmentDir, headers, collectionId);
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment