Skip to content

Instantly share code, notes, and snippets.

@ryanmitchell
Created October 11, 2024 05:42
Show Gist options
  • Save ryanmitchell/a31fc40c93a6fb835216ebf8b9bdf652 to your computer and use it in GitHub Desktop.
Save ryanmitchell/a31fc40c93a6fb835216ebf8b9bdf652 to your computer and use it in GitHub Desktop.
Wordpress -> Statamic import
<?php
namespace App\Console\Commands;
use Corcel\Model\Post;
use Illuminate\Console\Command;
use Statamic\Facades\Asset;
use Statamic\Facades\Entry;
class ImportWP extends Command
{
/**
* The name and signature of the console command.
*
* @var string
*/
protected $signature = 'import:wp';
/**
* The console command description.
*
* @var string
*/
protected $description = 'Import posts from WordPress';
protected string $siteUrl;
/**
* Create a new command instance.
*
* @return void
*/
public function __construct()
{
parent::__construct();
}
/**
* Execute the console command.
*
* @return int
*/
public function handle()
{
// function to get path and download
// images mentioned in content
function sortInternalImages($imageSrc){
// if we don't have the site url in the src then download the image
if (stristr($imageSrc, $this->siteUrl) == false){
// pull down featured image, save to assets folder
$data = file_get_contents($imageSrc);
$filename = explode('?', $imageSrc);
$filename = array_shift($filename);
$filename = basename($filename);
$path = public_path('assets/files/wp/downloads/'.$filename);
$returnPath = 'wp/downloads/'.$filename;
file_put_contents($path, $data);
// build asset for statamic
$asset = Asset::make()
->container('assets')
->path('wp/downloads/'.$filename);
$asset->save();
} else {
$filename = str_ireplace(
array(
'http://'.$this->siteUrl.'/wp-content/uploads/',
'https://'.$this->siteUrl.'/wp-content/uploads/'
),
array(
'',
''
),
$imageSrc
);
$returnPath = 'wp/'.$filename;
}
return $returnPath;
}
function remoteFileExists($url) {
$curl = curl_init($url);
//don't fetch the actual page, you only want to check the connection is ok
curl_setopt($curl, CURLOPT_NOBODY, true);
//do request
$result = curl_exec($curl);
$ret = false;
//if request did not fail
if ($result !== false) {
//if request was ok, check response code
$statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
if ($statusCode == 200) {
$ret = true;
}
}
curl_close($curl);
return $ret;
}
$posts = Post::status('publish')->limit(2500)->orderBy('post_date', 'ASC')->lazy();
$bar = $this->output->createProgressBar(count($posts));
$bar->start();
$count = 0;
foreach ($posts AS $post){
$bar->advance();
if ($post->post_type != 'page' && $post->post_type != 'product' && $post->post_type != 'popup_theme' && $post->post_type != 'attachment' && $post->title != ''){
//if ($post->hasTerm('category', 'news')){
if (is_null($post->getImageAttribute()) == false){
// pull down featured image, save to assets folder
$data = file_get_contents($post->getImageAttribute());
$filename = explode('?', $post->getImageAttribute());
$filename = array_shift($filename);
$filename = basename($filename);
$path = public_path('assets/files/wp/featured/'.$filename);
$pathParts = pathinfo($path);
file_put_contents($path, $data);
$featuredImage = $filename;
// build asset for statamic
$asset = Asset::make()
->container('assets')
->path('wp/featured/'.$filename);
$asset->save();
}
$wp_contents = explode('<!-- wp', $post->content);
$content = [];
foreach ($wp_contents as $wp_idx => $wp_content) {
// image
if (substr($wp_content, 0, 6) == ':image') {
preg_match('/src="([^"]+)"/', $wp_content, $matches);
if (count($matches) > 1) {
// if we don't have the name of the file
// in the image
if (stristr($matches[1], $pathParts['filename']) === false){
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'image',
'image' => str_replace('https://'.$this->siteUrl.'/wp-content/uploads/', 'wp/', $matches[1]),
]
]
];
}
}
// instagram
} else if (substr($wp_content, 0, 21) == ':core-embed/instagram') {
preg_match('/https:\/\/([^"]+)/', $wp_content, $matches);
if (count($matches) > 1) {
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'instagram',
'url' => 'https://'.rtrim($matches[1], '/'),
]
]
];
}
// video
} else if (substr($wp_content, 0, 19) == ':core-embed/youtube') {
preg_match('/https:\/\/([^"]+)/', $wp_content, $matches);
if (count($matches) > 1) {
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'youtube',
'video' => 'https://'.$matches[1],
]
]
];
}
// paragraph
} else {
$isGutenburg = stripos($wp_content, '-->') !== false;
$wp_content = preg_replace('/<!--[^-]+-->/', '', ($wp_idx > 0 ? '<!-- wp' : '').$wp_content);
if ($wp_content != '') {
$wp_content = preg_replace('/\[[^\]]+\]/', '', $wp_content);
$wp_content = preg_replace('/\[\/[^\]]+\]/', '', $wp_content);
if (!$isGutenburg){
$wp_content = '<p>'.str_replace("\r\n\r\n", '</p><p>', $wp_content).'</p>';
}
$wp_content = str_replace("\n", '', $wp_content);
$htmlDom = new \DOMDocument('1.0', 'UTF-8');
$htmlDom->encoding = 'utf-8';
@$htmlDom->loadHTML(mb_convert_encoding('<div id="content">'.$wp_content.'</div>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
// if (stripos($wp_content, 'khoollect.com') === false)
// continue;
foreach ($htmlDom->getElementById('content')->childNodes as $tag) {
// if we don't have a tagname
// we need to fake it so we can get through
// later on
if (!isset($tag->tagName))
$tag->tagName = 'NA';
if (substr($tag->tagName, 0, 1) == 'h') {
$level = substr($tag->tagName, 1, 1);
$value = trim(str_replace(['<h'.$level.'>', '</h'.$level.'>'], '', $tag->ownerDocument->saveHTML($tag)));
if ($value == '') {
$content[] = [
'type' => 'paragraph',
];
} else {
$content[] = [
'type' => 'heading',
'attrs' => [
'level' => $level,
],
'content' => [
[
'type' => 'text',
'text' => $value,
]
]
];
}
} else {
$value = trim(str_replace(['<p>', '</p>'], '', $tag->ownerDocument->saveHTML($tag)));
if ($value == '') {
$content[] = [
'type' => 'paragraph',
];
} else {
// if the tag is a
if ($tag->tagName == 'a'){
// if we have a child node
// and that child node is an img
// we can't have a link as BARD doesn't
// seem to allow it
$linkAllowed = true;
$contentOutput = [];
// if we have child nodes
if (isset($tag->childNodes) && sizeof($tag->childNodes) > 0){
foreach ($tag->childNodes AS $child){
if (isset($child->tagName) && $child->tagName == 'img'){
// if we don't have the name of the file
// in the image
if (stristr($child->getAttribute('src'), $pathParts['filename'])){
// set flag to disallow link
$linkAllowed = false;
// get filename of image
$filename = sortInternalImages($child->getAttribute('src'));
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'image',
'image' => $filename
]
]
];
}
}
}
}
// if we're allowed a link
if ($linkAllowed && $tag->textContent != ''){
$content[] = [
'type' => 'paragraph',
'content' => [[
'type' => 'text',
'marks' => [[
'type' => 'link',
'attrs' => [
'href' => $tag->getAttribute('href'),
'target' => $tag->getAttribute('target'),
'rel' => $tag->getAttribute('rel')
],
]],
'text' => $tag->textContent
]]
];
}
// do we have an image?
} else if ($tag->tagName == 'img' && $tag->getAttribute('src') != ''){
// if we don't have the name of the file
// in the image
if (stristr($tag->getAttribute('src'), $pathParts['filename']) === false){
$imageExists = remoteFileExists($tag->getAttribute('src'));
if ($imageExists){
// get filename of image
$filename = sortInternalImages($tag->getAttribute('src'));
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'image',
'image' => $filename,
]
]
];
}
}
// video
} else if (stripos($value, 'https://www.youtube.com/watch?v=') === 0) {
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'youtube',
'video' => $value,
]
]
];
// if we have child nodes
} else if (isset($tag->childNodes) && count($tag->childNodes) > 0){
$finalContent = [];
foreach ($tag->childNodes AS $child){
if (isset($child->tagName) && $child->tagName == 'a'){
// if we have a child node
// and that child node is an img
// we can't have a link as BARD doesn't
// seem to allow it
$linkAllowed = true;
$contentOutput = [];
// if we have child nodes
if (isset($child->childNodes) && sizeof($child->childNodes) > 0){
foreach ($child->childNodes AS $tertiaryChild){
if (isset($tertiaryChild->tagName) && $tertiaryChild->tagName == 'img'){
// if we don't have the name of the file
// in the image
if (stristr($tertiaryChild->getAttribute('src'), $pathParts['filename']) === false){
// set flag to disallow link
$linkAllowed = false;
if ($tertiaryChild->getAttribute('src') != ''){
$imageExists = remoteFileExists($tertiaryChild->getAttribute('src'));
if ($imageExists){
// get filename of image
$filename = sortInternalImages($tertiaryChild->getAttribute('src'));
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'image',
'image' => $filename
]
]
];
}
}
}
}
}
}
// if we're allowed a link
if ($linkAllowed && $child->textContent != ''){
$finalContent[] = [
'type' => 'text',
'marks' => [[
'type' => 'link',
'attrs' => [
'href' => $child->getAttribute('href'),
'target' => $child->getAttribute('target'),
'rel' => $child->getAttribute('rel')
],
]],
'text' => $child->textContent
];
}
} else if(isset($child->tagName) && $child->tagName == 'img'){
if ($child->getAttribute('src') != ''){
// if we don't have the name of the file
// in the image
if (stristr($child->getAttribute('src'), $pathParts['filename']) === false){
$imageExists = remoteFileExists($child->getAttribute('src'));
if ($imageExists){
// get filename of image
$filename = sortInternalImages($child->getAttribute('src'));
$content[] = [
'type' => 'set',
'attrs' => [
'values' => [
'type' => 'image',
'image' => $filename,
]
]
];
}
}
}
} else if (isset($child->tagName) && $child->tagName == 'strong'){
if ($child->textContent != ''){
$finalContent[] = [
'type' => 'text',
'marks' => [[
'type' => 'bold'
]],
'text' => $child->textContent
];
}
} else if (isset($child->tagName) && $child->tagName == 'em'){
if ($child->textContent != ''){
$finalContent[] = [
'type' => 'text',
'marks' => [[
'type' => 'italic'
]],
'text' => $child->textContent
];
}
} else {
if ($child->textContent != ''){
$finalContent[] = [
'type' => 'text',
'text' => $child->textContent
];
}
}
}
if (isset($finalContent) && sizeof($finalContent) > 0)
$content[] = [
'type' => 'paragraph',
'content' => $finalContent
];
// otherwise we just do a paragraph
} else {
// // if we're not null
// if (is_null($value) !== false){
//
$content[] = [
'type' => 'paragraph',
'content' => [
[
'type' => 'text',
'text' => $tag->textContent,
]
]
];
// }
}
}
}
}
}
}
}
// clear tags array
$tags = [];
// if we have tags
if (isset($post->terms['tag'])){
foreach ($post->terms['tag'] AS $k => $v){
$tags[] = $v;
}
}
// what collection are we going to
$useCollection = 'blog';
if ($post->hasTerm('category', 'breakfast')
|| $post->hasTerm('category', 'light-lovely')
|| $post->hasTerm('category', 'mains')
|| $post->hasTerm('category', 'quick-easy')
|| $post->hasTerm('category', 'sweet-treats')
|| $post->hasTerm('category', 'veggie-friendly')
){
// create a new statamic entry
$entry = Entry::make()
->collection('recipes')
->slug($post->slug)
->date($post->created_at)
->published(true)
->data([
'title' => str_ireplace('Recipe: ', '', $post->title),
'content' => $content,
'excerpt' => $post->excerpt,
'image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : '',
'tags' => isset($tags) ? $tags : array(),
'seotamic_meta_description' => $post->excerpt,
'seotamic_open_graph_description' => $post->excerpt,
'seotamic_twitter_description' => $post->excerpt,
'seotamic_image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : ''
]);
} else {
// create a new statamic entry
$entry = Entry::make()
->collection('blog')
->slug($post->slug)
->date($post->created_at)
->published(true)
->data([
'title' => $post->title,
'blog_categories' => [
'news'
],
'content' => $content,
'excerpt' => $post->excerpt,
'image' => isset($featuredImage) ? ['wp/featured/'.$featuredImage] : [],
'tags' => isset($tags) ? $tags : array(),
'seotamic_meta_description' => $post->excerpt,
'seotamic_open_graph_description' => $post->excerpt,
'seotamic_twitter_description' => $post->excerpt,
'seotamic_image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : ''
]);
}
$entry->save();
//exit();
}
}
echo $count;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment