Skip to content

Instantly share code, notes, and snippets.

@sadukie
Created October 7, 2024 01:23
Show Gist options
  • Save sadukie/6090f6e5a0b9ba74c9e3b0a1977ff2db to your computer and use it in GitHub Desktop.
Save sadukie/6090f6e5a0b9ba74c9e3b0a1977ff2db to your computer and use it in GitHub Desktop.
Migrating from WordPress to some other site
# Export your WordPress content before running this script
# Set the output path
$sitePath = "PATH_TO_SITE_OUTPUT"
$siteUrl = "YOUR_SITE_URL"
# Parse the XML
[xml]$xml = Get-Content .\YOUR_WORDPRESS_EXPORT.xml
# $xml.rss.channel.item | Get-Member
foreach ($item in $xml.rss.channel.item){
# Only migrating posts and pages
# Also not migrating the posts with query strings - need to look at those manually
if (($item.post_type.InnerText -eq "page" -or $item.post_type.InnerText -eq "post") -and $item.link.IndexOf("?") -eq -1){
$path = $item.link.Replace($siteUrl,"").Replace("/","\")
if ($item.post_type.InnerText -eq "page"){
$path = $sitePath + $path
} else {
$path = $sitePath + "\blog" + $path
}
# This will create the path with its subfolders
if (!(Test-Path $path)){
New-Item -ItemType Directory $path | Out-Null
}
$objectToConvert = New-Object -TypeName PSObject -Property @{
PubDate = $item.post_date.InnerText
Link = $item.link
Title = $item.title.InnerText
Author = "sadukie"
SlugName = $item.post_name.InnerText
PostType = $item.post_type.InnerText
Status = $item.status.InnerText
Category = $item.category.InnerText
Content = $item.encoded."#cdata-section"
}
# Check for Images
$contentString = $objectToConvert.Content | Out-String
$imagesToFetch = (Select-String -InputObject $contentString -Pattern "img src=""(https://[\w\d./-]*)""" -AllMatches).Matches
$imgCount = $imagesToFetch.Count
if ($imgCount -gt 0){
Write-Host "Post has $imgCount images: " $objectToConvert.Title
foreach ($imageToFetch in $imagesToFetch){
# Get the captured value - just the URL
$urlToFetch = New-Object System.Uri($imageToFetch.Groups[1].Value)
# Fetch the image
Write-Host "Download from: " $urlToFetch.AbsoluteUri
# FileName
$imgFileName = [System.IO.Path]::GetFileName($urlToFetch.LocalPath)
$outputFilePath = $path + $imgFileName
# Save it in this folder
Write-Host "Saving to: " $outputFilePath
# Uncommenting the line below will make the call to the web server to get the images
# Invoke-WebRequest -Uri $urlToFetch -OutFile $outputFilePath
# Update the content
$relativeFileName = "./" + $imgFileName
$contentString = $contentString.Replace($urlToFetch.AbsoluteUri,$relativeFileName)
}
}
# Now let's create the file
$filePath = $path + "\index.astro"
if (!(Test-Path $filePath)){
New-Item -ItemType File $filePath | Out-Null
# Write the frontmatter
$frontMatter = "===
pubDate: $($objectToConvert.PubDate)
title: $($objectToConvert.Title)
author: $($objectToConvert.Author)
category: $($objectToConvert.Category)
===
"
$frontMatter | Out-File -Append $filePath
# Write the content - This is in HTML
$contentString | Out-File -Append $filePath
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment