Created
October 11, 2024 05:42
-
-
Save ryanmitchell/a31fc40c93a6fb835216ebf8b9bdf652 to your computer and use it in GitHub Desktop.
Wordpress -> Statamic import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace App\Console\Commands; | |
use Corcel\Model\Post; | |
use Illuminate\Console\Command; | |
use Statamic\Facades\Asset; | |
use Statamic\Facades\Entry; | |
class ImportWP extends Command | |
{ | |
/** | |
* The name and signature of the console command. | |
* | |
* @var string | |
*/ | |
protected $signature = 'import:wp'; | |
/** | |
* The console command description. | |
* | |
* @var string | |
*/ | |
protected $description = 'Import posts from WordPress'; | |
protected string $siteUrl; | |
/** | |
* Create a new command instance. | |
* | |
* @return void | |
*/ | |
public function __construct() | |
{ | |
parent::__construct(); | |
} | |
/** | |
* Execute the console command. | |
* | |
* @return int | |
*/ | |
public function handle() | |
{ | |
// function to get path and download | |
// images mentioned in content | |
function sortInternalImages($imageSrc){ | |
// if we don't have the site url in the src then download the image | |
if (stristr($imageSrc, $this->siteUrl) == false){ | |
// pull down featured image, save to assets folder | |
$data = file_get_contents($imageSrc); | |
$filename = explode('?', $imageSrc); | |
$filename = array_shift($filename); | |
$filename = basename($filename); | |
$path = public_path('assets/files/wp/downloads/'.$filename); | |
$returnPath = 'wp/downloads/'.$filename; | |
file_put_contents($path, $data); | |
// build asset for statamic | |
$asset = Asset::make() | |
->container('assets') | |
->path('wp/downloads/'.$filename); | |
$asset->save(); | |
} else { | |
$filename = str_ireplace( | |
array( | |
'http://'.$this->siteUrl.'/wp-content/uploads/', | |
'https://'.$this->siteUrl.'/wp-content/uploads/' | |
), | |
array( | |
'', | |
'' | |
), | |
$imageSrc | |
); | |
$returnPath = 'wp/'.$filename; | |
} | |
return $returnPath; | |
} | |
function remoteFileExists($url) { | |
$curl = curl_init($url); | |
//don't fetch the actual page, you only want to check the connection is ok | |
curl_setopt($curl, CURLOPT_NOBODY, true); | |
//do request | |
$result = curl_exec($curl); | |
$ret = false; | |
//if request did not fail | |
if ($result !== false) { | |
//if request was ok, check response code | |
$statusCode = curl_getinfo($curl, CURLINFO_HTTP_CODE); | |
if ($statusCode == 200) { | |
$ret = true; | |
} | |
} | |
curl_close($curl); | |
return $ret; | |
} | |
$posts = Post::status('publish')->limit(2500)->orderBy('post_date', 'ASC')->lazy(); | |
$bar = $this->output->createProgressBar(count($posts)); | |
$bar->start(); | |
$count = 0; | |
foreach ($posts AS $post){ | |
$bar->advance(); | |
if ($post->post_type != 'page' && $post->post_type != 'product' && $post->post_type != 'popup_theme' && $post->post_type != 'attachment' && $post->title != ''){ | |
//if ($post->hasTerm('category', 'news')){ | |
if (is_null($post->getImageAttribute()) == false){ | |
// pull down featured image, save to assets folder | |
$data = file_get_contents($post->getImageAttribute()); | |
$filename = explode('?', $post->getImageAttribute()); | |
$filename = array_shift($filename); | |
$filename = basename($filename); | |
$path = public_path('assets/files/wp/featured/'.$filename); | |
$pathParts = pathinfo($path); | |
file_put_contents($path, $data); | |
$featuredImage = $filename; | |
// build asset for statamic | |
$asset = Asset::make() | |
->container('assets') | |
->path('wp/featured/'.$filename); | |
$asset->save(); | |
} | |
$wp_contents = explode('<!-- wp', $post->content); | |
$content = []; | |
foreach ($wp_contents as $wp_idx => $wp_content) { | |
// image | |
if (substr($wp_content, 0, 6) == ':image') { | |
preg_match('/src="([^"]+)"/', $wp_content, $matches); | |
if (count($matches) > 1) { | |
// if we don't have the name of the file | |
// in the image | |
if (stristr($matches[1], $pathParts['filename']) === false){ | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'image', | |
'image' => str_replace('https://'.$this->siteUrl.'/wp-content/uploads/', 'wp/', $matches[1]), | |
] | |
] | |
]; | |
} | |
} | |
} else if (substr($wp_content, 0, 21) == ':core-embed/instagram') { | |
preg_match('/https:\/\/([^"]+)/', $wp_content, $matches); | |
if (count($matches) > 1) { | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'instagram', | |
'url' => 'https://'.rtrim($matches[1], '/'), | |
] | |
] | |
]; | |
} | |
// video | |
} else if (substr($wp_content, 0, 19) == ':core-embed/youtube') { | |
preg_match('/https:\/\/([^"]+)/', $wp_content, $matches); | |
if (count($matches) > 1) { | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'youtube', | |
'video' => 'https://'.$matches[1], | |
] | |
] | |
]; | |
} | |
// paragraph | |
} else { | |
$isGutenburg = stripos($wp_content, '-->') !== false; | |
$wp_content = preg_replace('/<!--[^-]+-->/', '', ($wp_idx > 0 ? '<!-- wp' : '').$wp_content); | |
if ($wp_content != '') { | |
$wp_content = preg_replace('/\[[^\]]+\]/', '', $wp_content); | |
$wp_content = preg_replace('/\[\/[^\]]+\]/', '', $wp_content); | |
if (!$isGutenburg){ | |
$wp_content = '<p>'.str_replace("\r\n\r\n", '</p><p>', $wp_content).'</p>'; | |
} | |
$wp_content = str_replace("\n", '', $wp_content); | |
$htmlDom = new \DOMDocument('1.0', 'UTF-8'); | |
$htmlDom->encoding = 'utf-8'; | |
@$htmlDom->loadHTML(mb_convert_encoding('<div id="content">'.$wp_content.'</div>', 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); | |
// if (stripos($wp_content, 'khoollect.com') === false) | |
// continue; | |
foreach ($htmlDom->getElementById('content')->childNodes as $tag) { | |
// if we don't have a tagname | |
// we need to fake it so we can get through | |
// later on | |
if (!isset($tag->tagName)) | |
$tag->tagName = 'NA'; | |
if (substr($tag->tagName, 0, 1) == 'h') { | |
$level = substr($tag->tagName, 1, 1); | |
$value = trim(str_replace(['<h'.$level.'>', '</h'.$level.'>'], '', $tag->ownerDocument->saveHTML($tag))); | |
if ($value == '') { | |
$content[] = [ | |
'type' => 'paragraph', | |
]; | |
} else { | |
$content[] = [ | |
'type' => 'heading', | |
'attrs' => [ | |
'level' => $level, | |
], | |
'content' => [ | |
[ | |
'type' => 'text', | |
'text' => $value, | |
] | |
] | |
]; | |
} | |
} else { | |
$value = trim(str_replace(['<p>', '</p>'], '', $tag->ownerDocument->saveHTML($tag))); | |
if ($value == '') { | |
$content[] = [ | |
'type' => 'paragraph', | |
]; | |
} else { | |
// if the tag is a | |
if ($tag->tagName == 'a'){ | |
// if we have a child node | |
// and that child node is an img | |
// we can't have a link as BARD doesn't | |
// seem to allow it | |
$linkAllowed = true; | |
$contentOutput = []; | |
// if we have child nodes | |
if (isset($tag->childNodes) && sizeof($tag->childNodes) > 0){ | |
foreach ($tag->childNodes AS $child){ | |
if (isset($child->tagName) && $child->tagName == 'img'){ | |
// if we don't have the name of the file | |
// in the image | |
if (stristr($child->getAttribute('src'), $pathParts['filename'])){ | |
// set flag to disallow link | |
$linkAllowed = false; | |
// get filename of image | |
$filename = sortInternalImages($child->getAttribute('src')); | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'image', | |
'image' => $filename | |
] | |
] | |
]; | |
} | |
} | |
} | |
} | |
// if we're allowed a link | |
if ($linkAllowed && $tag->textContent != ''){ | |
$content[] = [ | |
'type' => 'paragraph', | |
'content' => [[ | |
'type' => 'text', | |
'marks' => [[ | |
'type' => 'link', | |
'attrs' => [ | |
'href' => $tag->getAttribute('href'), | |
'target' => $tag->getAttribute('target'), | |
'rel' => $tag->getAttribute('rel') | |
], | |
]], | |
'text' => $tag->textContent | |
]] | |
]; | |
} | |
// do we have an image? | |
} else if ($tag->tagName == 'img' && $tag->getAttribute('src') != ''){ | |
// if we don't have the name of the file | |
// in the image | |
if (stristr($tag->getAttribute('src'), $pathParts['filename']) === false){ | |
$imageExists = remoteFileExists($tag->getAttribute('src')); | |
if ($imageExists){ | |
// get filename of image | |
$filename = sortInternalImages($tag->getAttribute('src')); | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'image', | |
'image' => $filename, | |
] | |
] | |
]; | |
} | |
} | |
// video | |
} else if (stripos($value, 'https://www.youtube.com/watch?v=') === 0) { | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'youtube', | |
'video' => $value, | |
] | |
] | |
]; | |
// if we have child nodes | |
} else if (isset($tag->childNodes) && count($tag->childNodes) > 0){ | |
$finalContent = []; | |
foreach ($tag->childNodes AS $child){ | |
if (isset($child->tagName) && $child->tagName == 'a'){ | |
// if we have a child node | |
// and that child node is an img | |
// we can't have a link as BARD doesn't | |
// seem to allow it | |
$linkAllowed = true; | |
$contentOutput = []; | |
// if we have child nodes | |
if (isset($child->childNodes) && sizeof($child->childNodes) > 0){ | |
foreach ($child->childNodes AS $tertiaryChild){ | |
if (isset($tertiaryChild->tagName) && $tertiaryChild->tagName == 'img'){ | |
// if we don't have the name of the file | |
// in the image | |
if (stristr($tertiaryChild->getAttribute('src'), $pathParts['filename']) === false){ | |
// set flag to disallow link | |
$linkAllowed = false; | |
if ($tertiaryChild->getAttribute('src') != ''){ | |
$imageExists = remoteFileExists($tertiaryChild->getAttribute('src')); | |
if ($imageExists){ | |
// get filename of image | |
$filename = sortInternalImages($tertiaryChild->getAttribute('src')); | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'image', | |
'image' => $filename | |
] | |
] | |
]; | |
} | |
} | |
} | |
} | |
} | |
} | |
// if we're allowed a link | |
if ($linkAllowed && $child->textContent != ''){ | |
$finalContent[] = [ | |
'type' => 'text', | |
'marks' => [[ | |
'type' => 'link', | |
'attrs' => [ | |
'href' => $child->getAttribute('href'), | |
'target' => $child->getAttribute('target'), | |
'rel' => $child->getAttribute('rel') | |
], | |
]], | |
'text' => $child->textContent | |
]; | |
} | |
} else if(isset($child->tagName) && $child->tagName == 'img'){ | |
if ($child->getAttribute('src') != ''){ | |
// if we don't have the name of the file | |
// in the image | |
if (stristr($child->getAttribute('src'), $pathParts['filename']) === false){ | |
$imageExists = remoteFileExists($child->getAttribute('src')); | |
if ($imageExists){ | |
// get filename of image | |
$filename = sortInternalImages($child->getAttribute('src')); | |
$content[] = [ | |
'type' => 'set', | |
'attrs' => [ | |
'values' => [ | |
'type' => 'image', | |
'image' => $filename, | |
] | |
] | |
]; | |
} | |
} | |
} | |
} else if (isset($child->tagName) && $child->tagName == 'strong'){ | |
if ($child->textContent != ''){ | |
$finalContent[] = [ | |
'type' => 'text', | |
'marks' => [[ | |
'type' => 'bold' | |
]], | |
'text' => $child->textContent | |
]; | |
} | |
} else if (isset($child->tagName) && $child->tagName == 'em'){ | |
if ($child->textContent != ''){ | |
$finalContent[] = [ | |
'type' => 'text', | |
'marks' => [[ | |
'type' => 'italic' | |
]], | |
'text' => $child->textContent | |
]; | |
} | |
} else { | |
if ($child->textContent != ''){ | |
$finalContent[] = [ | |
'type' => 'text', | |
'text' => $child->textContent | |
]; | |
} | |
} | |
} | |
if (isset($finalContent) && sizeof($finalContent) > 0) | |
$content[] = [ | |
'type' => 'paragraph', | |
'content' => $finalContent | |
]; | |
// otherwise we just do a paragraph | |
} else { | |
// // if we're not null | |
// if (is_null($value) !== false){ | |
// | |
$content[] = [ | |
'type' => 'paragraph', | |
'content' => [ | |
[ | |
'type' => 'text', | |
'text' => $tag->textContent, | |
] | |
] | |
]; | |
// } | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
// clear tags array | |
$tags = []; | |
// if we have tags | |
if (isset($post->terms['tag'])){ | |
foreach ($post->terms['tag'] AS $k => $v){ | |
$tags[] = $v; | |
} | |
} | |
// what collection are we going to | |
$useCollection = 'blog'; | |
if ($post->hasTerm('category', 'breakfast') | |
|| $post->hasTerm('category', 'light-lovely') | |
|| $post->hasTerm('category', 'mains') | |
|| $post->hasTerm('category', 'quick-easy') | |
|| $post->hasTerm('category', 'sweet-treats') | |
|| $post->hasTerm('category', 'veggie-friendly') | |
){ | |
// create a new statamic entry | |
$entry = Entry::make() | |
->collection('recipes') | |
->slug($post->slug) | |
->date($post->created_at) | |
->published(true) | |
->data([ | |
'title' => str_ireplace('Recipe: ', '', $post->title), | |
'content' => $content, | |
'excerpt' => $post->excerpt, | |
'image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : '', | |
'tags' => isset($tags) ? $tags : array(), | |
'seotamic_meta_description' => $post->excerpt, | |
'seotamic_open_graph_description' => $post->excerpt, | |
'seotamic_twitter_description' => $post->excerpt, | |
'seotamic_image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : '' | |
]); | |
} else { | |
// create a new statamic entry | |
$entry = Entry::make() | |
->collection('blog') | |
->slug($post->slug) | |
->date($post->created_at) | |
->published(true) | |
->data([ | |
'title' => $post->title, | |
'blog_categories' => [ | |
'news' | |
], | |
'content' => $content, | |
'excerpt' => $post->excerpt, | |
'image' => isset($featuredImage) ? ['wp/featured/'.$featuredImage] : [], | |
'tags' => isset($tags) ? $tags : array(), | |
'seotamic_meta_description' => $post->excerpt, | |
'seotamic_open_graph_description' => $post->excerpt, | |
'seotamic_twitter_description' => $post->excerpt, | |
'seotamic_image' => isset($featuredImage) ? 'wp/featured/'.$featuredImage : '' | |
]); | |
} | |
$entry->save(); | |
//exit(); | |
} | |
} | |
echo $count; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment