Skip to content

Instantly share code, notes, and snippets.

@mitchellhislop
Created November 3, 2024 04:31
Show Gist options
  • Save mitchellhislop/534ee1b9c0538cce9a4bcdba41b3255c to your computer and use it in GitHub Desktop.
Save mitchellhislop/534ee1b9c0538cce9a4bcdba41b3255c to your computer and use it in GitHub Desktop.
A New Squarespace to Ghost Migration Script
const GhostAdminAPI = require('@tryghost/admin-api');
const fs = require('fs');
const xml2js = require('xml2js');
const path = require('path');
const os = require('os');
const api = new GhostAdminAPI({
url: '<YOUR-GHOST-API-URL>',
key: '<YOUR-GHOST-ADMIN-API-KEY>',
version: 'v5.0'
});
// Create directories
const tmpDir = path.join(os.tmpdir(), 'ghost-migration-images');
const archiveDir = path.join(process.cwd(), 'archive');
const archiveImagesDir = path.join(archiveDir, 'images');
const archivePostsDir = path.join(archiveDir, 'posts');
const archivePagesDir = path.join(archiveDir, 'pages');
// Ensure directories exist
[tmpDir, archiveDir, archiveImagesDir, archivePostsDir, archivePagesDir].forEach(dir => {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
});
// Store URL mappings
const imageUrlMap = new Map();
// Helper function to create a safe filename
function createSafeFilename(str) {
return str.toLowerCase()
.replace(/[^a-z0-9]/g, '-')
.replace(/-+/g, '-')
.replace(/^-|-$/g, '');
}
function containsHTML(content) {
return /<[a-z][\s\S]*>/i.test(content);
}
// Download image and handle storage
async function processImage(url) {
try {
if (imageUrlMap.has(url)) {
return imageUrlMap.get(url).ghostUrl; // Return just the Ghost URL for content replacement
}
// Generate filenames
const fileName = `${Date.now()}-${path.basename(url).split('?')[0]}`;
const tempFilePath = path.join(tmpDir, fileName);
const archiveImagePath = path.join(archiveImagesDir, fileName);
const relativeImagePath = path.join('images', fileName);
// Download image
const response = await fetch(url);
const buffer = Buffer.from(await response.arrayBuffer());
// Save to temp for Ghost upload
fs.writeFileSync(tempFilePath, buffer);
// Save to archive
fs.writeFileSync(archiveImagePath, buffer);
try {
// Upload to Ghost
const ghostImage = await api.images.upload({
file: tempFilePath
});
// Store both Ghost URL and local path
imageUrlMap.set(url, {
ghostUrl: ghostImage.url,
localPath: relativeImagePath
});
return ghostImage.url; // Return just the Ghost URL for content replacement
} finally {
// Clean up temporary file
if (fs.existsSync(tempFilePath)) {
fs.unlinkSync(tempFilePath);
}
}
} catch (error) {
console.error(`Failed to process image ${url}:`, error);
return url;
}
}
// Process content and create local version
async function processContent(content, useLocal = false) {
// First handle images
const imgRegex = /(https:\/\/images\.squarespace-cdn\.com\/[^"'\s)]+)/g;
const urls = content.match(imgRegex) || [];
let processedContent = content;
for (const url of new Set(urls)) {
console.log(`Processing image: ${url}`);
await processImage(url);
const imageData = imageUrlMap.get(url);
if (imageData) {
const replacementUrl = useLocal ? imageData.localPath : imageData.ghostUrl;
processedContent = processedContent.replace(
new RegExp(url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'),
replacementUrl
);
}
}
// Clean up any problematic HTML
processedContent = processedContent
// Fix any self-closing tags that might cause issues
.replace(/\/>/g, '>')
// Remove any empty HTML comments
.replace(/<!--\s*-->/g, '')
// Remove any scripts
.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
// Fix any malformed HTML entities
.replace(/&(?![a-zA-Z0-9#]+;)/g, '&amp;');
return processedContent;
}
const createMobiledoc = (content) => {
// Check if content contains HTML
const isHTML = containsHTML(content);
return {
version: '0.3.1',
atoms: [],
cards: isHTML ?
[['html', { cardName: 'html', html: content }]] :
[['markdown', { cardName: 'markdown', markdown: content }]],
markups: [],
sections: [[10, 0]]
};
};
// Find existing post/page by title
async function findExistingContent(title, type = 'posts') {
try {
const existingContent = await api[type].browse({
filter: `title:'${title.replace(/'/g, "\\'")}'`
});
return existingContent.length > 0 ? existingContent[0] : null;
} catch (error) {
console.error(`Error finding existing ${type}:`, error);
return null;
}
}
// Save content to local archive
async function saveToArchive(item, processedContent, type = 'posts') {
const title = item.title[0];
const safeFilename = createSafeFilename(title);
const directory = type === 'posts' ? archivePostsDir : archivePagesDir;
const filePath = path.join(directory, containsHTML(processedContent) ?
`${safeFilename}.html` :
`${safeFilename}.md`
);
// Create frontmatter
const frontmatter = {
title: title,
date: item.pubDate ? new Date(item.pubDate).toISOString() : new Date().toISOString(),
tags: item.category ? item.category.map(c => c['$'].nicename) : [],
type: type,
format: containsHTML(processedContent) ? 'html' : 'markdown'
};
const content = `---
${Object.entries(frontmatter).map(([key, value]) => `${key}: ${JSON.stringify(value)}`).join('\n')}
---
${processedContent}`;
fs.writeFileSync(filePath, content, 'utf8');
console.log(`Saved local archive: ${filePath}`);
}
// Process a single content item (post or page)
async function processContentItem(item, type = 'posts') {
const title = item.title[0];
console.log(`Processing ${type} item: ${title}`);
// Process content for Ghost (with Ghost URLs)
const ghostContent = await processContent(item['content:encoded'][0], false);
// Process content for local archive (with local paths)
const localContent = await processContent(item['content:encoded'][0], true);
const tags = item.category ? item.category.map(c => c['$'].nicename) : [];
const contentData = {
title: title,
tags,
status: 'published',
published_at: item.pubDate ? `${new Date(item.pubDate).toISOString()}` : '',
updated_at: new Date().toISOString()
};
const mobiledoc = createMobiledoc(ghostContent);
contentData.mobiledoc = JSON.stringify(mobiledoc);
try {
// Save to Ghost
const existingContent = await findExistingContent(title, type);
if (existingContent) {
console.log(`Updating existing ${type}: ${title}`);
try {
await api[type].edit({
id: existingContent.id,
title: contentData.title,
mobiledoc: contentData.mobiledoc,
tags: contentData.tags,
updated_at: contentData.updated_at,
published_at: contentData.published_at
});
} catch (err) {
if (err.code === 'UPDATE_COLLISION') {
// Wait a second and try again with latest updated_at
console.log(`Update collision detected for ${title}, retrying...`);
await new Promise(resolve => setTimeout(resolve, 1000));
// Get the latest version
const latestContent = await findExistingContent(title, type);
if (latestContent) {
await api[type].edit({
id: latestContent.id,
title: contentData.title,
mobiledoc: contentData.mobiledoc,
tags: contentData.tags,
updated_at: latestContent.updated_at, // Use the server's timestamp
published_at: contentData.published_at
});
}
} else {
throw err;
}
}
} else {
console.log(`Creating new ${type}: ${title}`);
await api[type].add(contentData);
}
// Save to local archive
await saveToArchive(item, localContent, type);
console.log(`Successfully processed ${type}: ${title}`);
} catch (err) {
console.error(`Failed to process ${type} ${title}:`, err);
console.error('Error details:', err.message);
if (err.details) {
console.error('Validation details:', JSON.stringify(err.details, null, 2));
}
}
}
// Main process
const parser = new xml2js.Parser();
fs.readFile(__dirname + '/squarespace.xml', async function(err, data) {
if (err) {
console.error('Error reading XML file:', err);
return;
}
try {
const result = await parser.parseStringPromise(data);
const items = result.rss.channel[0].item;
const posts = items.filter(i => i['wp:post_type'].includes('post'));
const pages = items.filter(i => i['wp:post_type'].includes('page'));
console.log(`Found ${posts.length} posts and ${pages.length} pages to process`);
// Process posts
console.log('\nProcessing posts...');
for (const post of posts) {
await processContentItem(post, 'posts');
}
// Process pages
console.log('\nProcessing pages...');
for (const page of pages) {
await processContentItem(page, 'pages');
}
// Save URL mapping for reference
fs.writeFileSync('image-url-mapping.json', JSON.stringify(Object.fromEntries(imageUrlMap), null, 2));
console.log('\nMigration complete!');
console.log('- Ghost blog updated');
console.log(`- Local archive created at: ${archiveDir}`);
console.log('- Image URL mapping saved to: image-url-mapping.json');
// Clean up temporary directory
if (fs.existsSync(tmpDir)) {
fs.rmdirSync(tmpDir, { recursive: true });
}
} catch (err) {
console.error('Error processing XML:', err);
console.error('Error details:', err.message);
}
});
{
"name": "squarespace2ghost",
"version": "1.0.0",
"main": "index.js",
"license": "MIT",
"scripts": {
"create": "node -r esm index.js"
},
"dependencies": {
"@tryghost/admin-api": "^1.0.2",
"@tryghost/html-to-mobiledoc": "^0.6.3",
"esm": "^3.2.25",
"request": "^2.88.0",
"xml2js": "^0.4.23"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment