Created
November 3, 2024 04:31
-
-
Save mitchellhislop/534ee1b9c0538cce9a4bcdba41b3255c to your computer and use it in GitHub Desktop.
A New Squarespace to Ghost Migration Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| const GhostAdminAPI = require('@tryghost/admin-api'); | |
| const fs = require('fs'); | |
| const xml2js = require('xml2js'); | |
| const path = require('path'); | |
| const os = require('os'); | |
| const api = new GhostAdminAPI({ | |
| url: '<YOUR-GHOST-API-URL>', | |
| key: '<YOUR-GHOST-ADMIN-API-KEY>', | |
| version: 'v5.0' | |
| }); | |
| // Create directories | |
| const tmpDir = path.join(os.tmpdir(), 'ghost-migration-images'); | |
| const archiveDir = path.join(process.cwd(), 'archive'); | |
| const archiveImagesDir = path.join(archiveDir, 'images'); | |
| const archivePostsDir = path.join(archiveDir, 'posts'); | |
| const archivePagesDir = path.join(archiveDir, 'pages'); | |
| // Ensure directories exist | |
| [tmpDir, archiveDir, archiveImagesDir, archivePostsDir, archivePagesDir].forEach(dir => { | |
| if (!fs.existsSync(dir)) { | |
| fs.mkdirSync(dir, { recursive: true }); | |
| } | |
| }); | |
| // Store URL mappings | |
| const imageUrlMap = new Map(); | |
| // Helper function to create a safe filename | |
| function createSafeFilename(str) { | |
| return str.toLowerCase() | |
| .replace(/[^a-z0-9]/g, '-') | |
| .replace(/-+/g, '-') | |
| .replace(/^-|-$/g, ''); | |
| } | |
| function containsHTML(content) { | |
| return /<[a-z][\s\S]*>/i.test(content); | |
| } | |
| // Download image and handle storage | |
| async function processImage(url) { | |
| try { | |
| if (imageUrlMap.has(url)) { | |
| return imageUrlMap.get(url).ghostUrl; // Return just the Ghost URL for content replacement | |
| } | |
| // Generate filenames | |
| const fileName = `${Date.now()}-${path.basename(url).split('?')[0]}`; | |
| const tempFilePath = path.join(tmpDir, fileName); | |
| const archiveImagePath = path.join(archiveImagesDir, fileName); | |
| const relativeImagePath = path.join('images', fileName); | |
| // Download image | |
| const response = await fetch(url); | |
| const buffer = Buffer.from(await response.arrayBuffer()); | |
| // Save to temp for Ghost upload | |
| fs.writeFileSync(tempFilePath, buffer); | |
| // Save to archive | |
| fs.writeFileSync(archiveImagePath, buffer); | |
| try { | |
| // Upload to Ghost | |
| const ghostImage = await api.images.upload({ | |
| file: tempFilePath | |
| }); | |
| // Store both Ghost URL and local path | |
| imageUrlMap.set(url, { | |
| ghostUrl: ghostImage.url, | |
| localPath: relativeImagePath | |
| }); | |
| return ghostImage.url; // Return just the Ghost URL for content replacement | |
| } finally { | |
| // Clean up temporary file | |
| if (fs.existsSync(tempFilePath)) { | |
| fs.unlinkSync(tempFilePath); | |
| } | |
| } | |
| } catch (error) { | |
| console.error(`Failed to process image ${url}:`, error); | |
| return url; | |
| } | |
| } | |
| // Process content and create local version | |
| async function processContent(content, useLocal = false) { | |
| // First handle images | |
| const imgRegex = /(https:\/\/images\.squarespace-cdn\.com\/[^"'\s)]+)/g; | |
| const urls = content.match(imgRegex) || []; | |
| let processedContent = content; | |
| for (const url of new Set(urls)) { | |
| console.log(`Processing image: ${url}`); | |
| await processImage(url); | |
| const imageData = imageUrlMap.get(url); | |
| if (imageData) { | |
| const replacementUrl = useLocal ? imageData.localPath : imageData.ghostUrl; | |
| processedContent = processedContent.replace( | |
| new RegExp(url.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'g'), | |
| replacementUrl | |
| ); | |
| } | |
| } | |
| // Clean up any problematic HTML | |
| processedContent = processedContent | |
| // Fix any self-closing tags that might cause issues | |
| .replace(/\/>/g, '>') | |
| // Remove any empty HTML comments | |
| .replace(/<!--\s*-->/g, '') | |
| // Remove any scripts | |
| .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '') | |
| // Fix any malformed HTML entities | |
| .replace(/&(?![a-zA-Z0-9#]+;)/g, '&'); | |
| return processedContent; | |
| } | |
| const createMobiledoc = (content) => { | |
| // Check if content contains HTML | |
| const isHTML = containsHTML(content); | |
| return { | |
| version: '0.3.1', | |
| atoms: [], | |
| cards: isHTML ? | |
| [['html', { cardName: 'html', html: content }]] : | |
| [['markdown', { cardName: 'markdown', markdown: content }]], | |
| markups: [], | |
| sections: [[10, 0]] | |
| }; | |
| }; | |
| // Find existing post/page by title | |
| async function findExistingContent(title, type = 'posts') { | |
| try { | |
| const existingContent = await api[type].browse({ | |
| filter: `title:'${title.replace(/'/g, "\\'")}'` | |
| }); | |
| return existingContent.length > 0 ? existingContent[0] : null; | |
| } catch (error) { | |
| console.error(`Error finding existing ${type}:`, error); | |
| return null; | |
| } | |
| } | |
| // Save content to local archive | |
| async function saveToArchive(item, processedContent, type = 'posts') { | |
| const title = item.title[0]; | |
| const safeFilename = createSafeFilename(title); | |
| const directory = type === 'posts' ? archivePostsDir : archivePagesDir; | |
| const filePath = path.join(directory, containsHTML(processedContent) ? | |
| `${safeFilename}.html` : | |
| `${safeFilename}.md` | |
| ); | |
| // Create frontmatter | |
| const frontmatter = { | |
| title: title, | |
| date: item.pubDate ? new Date(item.pubDate).toISOString() : new Date().toISOString(), | |
| tags: item.category ? item.category.map(c => c['$'].nicename) : [], | |
| type: type, | |
| format: containsHTML(processedContent) ? 'html' : 'markdown' | |
| }; | |
| const content = `--- | |
| ${Object.entries(frontmatter).map(([key, value]) => `${key}: ${JSON.stringify(value)}`).join('\n')} | |
| --- | |
| ${processedContent}`; | |
| fs.writeFileSync(filePath, content, 'utf8'); | |
| console.log(`Saved local archive: ${filePath}`); | |
| } | |
| // Process a single content item (post or page) | |
| async function processContentItem(item, type = 'posts') { | |
| const title = item.title[0]; | |
| console.log(`Processing ${type} item: ${title}`); | |
| // Process content for Ghost (with Ghost URLs) | |
| const ghostContent = await processContent(item['content:encoded'][0], false); | |
| // Process content for local archive (with local paths) | |
| const localContent = await processContent(item['content:encoded'][0], true); | |
| const tags = item.category ? item.category.map(c => c['$'].nicename) : []; | |
| const contentData = { | |
| title: title, | |
| tags, | |
| status: 'published', | |
| published_at: item.pubDate ? `${new Date(item.pubDate).toISOString()}` : '', | |
| updated_at: new Date().toISOString() | |
| }; | |
| const mobiledoc = createMobiledoc(ghostContent); | |
| contentData.mobiledoc = JSON.stringify(mobiledoc); | |
| try { | |
| // Save to Ghost | |
| const existingContent = await findExistingContent(title, type); | |
| if (existingContent) { | |
| console.log(`Updating existing ${type}: ${title}`); | |
| try { | |
| await api[type].edit({ | |
| id: existingContent.id, | |
| title: contentData.title, | |
| mobiledoc: contentData.mobiledoc, | |
| tags: contentData.tags, | |
| updated_at: contentData.updated_at, | |
| published_at: contentData.published_at | |
| }); | |
| } catch (err) { | |
| if (err.code === 'UPDATE_COLLISION') { | |
| // Wait a second and try again with latest updated_at | |
| console.log(`Update collision detected for ${title}, retrying...`); | |
| await new Promise(resolve => setTimeout(resolve, 1000)); | |
| // Get the latest version | |
| const latestContent = await findExistingContent(title, type); | |
| if (latestContent) { | |
| await api[type].edit({ | |
| id: latestContent.id, | |
| title: contentData.title, | |
| mobiledoc: contentData.mobiledoc, | |
| tags: contentData.tags, | |
| updated_at: latestContent.updated_at, // Use the server's timestamp | |
| published_at: contentData.published_at | |
| }); | |
| } | |
| } else { | |
| throw err; | |
| } | |
| } | |
| } else { | |
| console.log(`Creating new ${type}: ${title}`); | |
| await api[type].add(contentData); | |
| } | |
| // Save to local archive | |
| await saveToArchive(item, localContent, type); | |
| console.log(`Successfully processed ${type}: ${title}`); | |
| } catch (err) { | |
| console.error(`Failed to process ${type} ${title}:`, err); | |
| console.error('Error details:', err.message); | |
| if (err.details) { | |
| console.error('Validation details:', JSON.stringify(err.details, null, 2)); | |
| } | |
| } | |
| } | |
| // Main process | |
| const parser = new xml2js.Parser(); | |
| fs.readFile(__dirname + '/squarespace.xml', async function(err, data) { | |
| if (err) { | |
| console.error('Error reading XML file:', err); | |
| return; | |
| } | |
| try { | |
| const result = await parser.parseStringPromise(data); | |
| const items = result.rss.channel[0].item; | |
| const posts = items.filter(i => i['wp:post_type'].includes('post')); | |
| const pages = items.filter(i => i['wp:post_type'].includes('page')); | |
| console.log(`Found ${posts.length} posts and ${pages.length} pages to process`); | |
| // Process posts | |
| console.log('\nProcessing posts...'); | |
| for (const post of posts) { | |
| await processContentItem(post, 'posts'); | |
| } | |
| // Process pages | |
| console.log('\nProcessing pages...'); | |
| for (const page of pages) { | |
| await processContentItem(page, 'pages'); | |
| } | |
| // Save URL mapping for reference | |
| fs.writeFileSync('image-url-mapping.json', JSON.stringify(Object.fromEntries(imageUrlMap), null, 2)); | |
| console.log('\nMigration complete!'); | |
| console.log('- Ghost blog updated'); | |
| console.log(`- Local archive created at: ${archiveDir}`); | |
| console.log('- Image URL mapping saved to: image-url-mapping.json'); | |
| // Clean up temporary directory | |
| if (fs.existsSync(tmpDir)) { | |
| fs.rmdirSync(tmpDir, { recursive: true }); | |
| } | |
| } catch (err) { | |
| console.error('Error processing XML:', err); | |
| console.error('Error details:', err.message); | |
| } | |
| }); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "name": "squarespace2ghost", | |
| "version": "1.0.0", | |
| "main": "index.js", | |
| "license": "MIT", | |
| "scripts": { | |
| "create": "node -r esm index.js" | |
| }, | |
| "dependencies": { | |
| "@tryghost/admin-api": "^1.0.2", | |
| "@tryghost/html-to-mobiledoc": "^0.6.3", | |
| "esm": "^3.2.25", | |
| "request": "^2.88.0", | |
| "xml2js": "^0.4.23" | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment