Last active
October 17, 2024 13:27
-
-
Save uzielweb/667eb5e68e7cde3dfa51897ff8b92e9f to your computer and use it in GitHub Desktop.
Import Articles Cli for Joomla 5 (Form 2 Content component and Joomal Content Articles component)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
Instructions for use: | |
- To skip the first 100 articles, use the argument "--skip 100". | |
- To manually define the limit of records to be imported, use the argument "-Limit" followed by the desired number, for example, "-Limit 50". | |
- To define the start date of import, use the argument "--start_date" followed by the date in "Yyyy-MM-DD" format, for example, "--start_date 2023-01-01". | |
- To define the end of importation, use the argument "--end_date" followed by the date in "Yyyy-MM-DD" format, for example, "--end_date 2023-12-31". | |
- To define the category of origin, use the argument "--source_category" followed by the desired ID, for example, "-source_category 10". | |
- To define the destination category, use the argument "--destination_category" followed by the desired category ID, for example, "--destination_category 5". | |
- To select Joomla Standard Articles from the Content table in the old database, use the argument "-standard_articles True". | |
Example to import Joomla's standard articles: | |
PHP ImportArticles.php --standard_articles True --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 | |
- To import articles from F2C (Form 2 Content), simply perform the script without the argument "-standard_articles": | |
Example to import F2C articles: | |
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 | |
- To avoid cleaning HTML in imported articles, use the argument "--no_clean_html". | |
Example to import articles without cleaning HTML: | |
PHP ImportArticles.php --skip 100 --limit 50 --start_date 2023-01-01 --end_date 2023-12-31 --source_category 10 --destination_category 5 --no_clean_html | |
- The log file will be created in Administrator/Logs/Import_log.txt as defined in the $logfile variable and Joomla configuration.php. | |
*/ | |
define('_JEXEC', 1); | |
use Joomla\CMS\Factory; | |
use Joomla\Console\Application; | |
use Joomla\CMS\HTML\HTMLHelper; | |
use Joomla\CMS\Language\Text; | |
use Joomla\CMS\Date\Date; | |
use Joomla\Database\DatabaseAwareInterface; | |
use Joomla\Database\DatabaseAwareTrait; | |
// JDatabaseDriver | |
use Joomla\Database\DatabaseDriver; | |
use Joomla\Database\DatabaseInterface; | |
define('JPATH_BASE', dirname(dirname(__file__))); | |
define('DS', DIRECTORY_SEPARATOR); | |
require_once JPATH_BASE . DS . 'includes' . DS . 'defines.php'; | |
require_once JPATH_BASE . DS . 'includes' . DS . 'framework.php'; | |
define('SOURCE_CATEGORY', 90); | |
define('TARGET_CATEGORY', 90); | |
define('START_DATE', '0000-01-01 00:00:00'); | |
define('END_DATE', '9999-12-31 23:59:59'); | |
define('DEFAULT_LIMIT', 100); | |
// Set the user's time zone | |
date_default_timezone_set('America/Sao_Paulo'); | |
class ImportArticles extends Application implements DatabaseAwareInterface | |
{ | |
use DatabaseAwareTrait; | |
public function __construct() | |
{ | |
parent::__construct(); | |
} | |
public function slugify($text) | |
{ | |
// replace non letter or digits by - | |
$text = preg_replace('~[^\\pL\d]+~u', '-', $text); | |
// trim | |
$text = trim($text, '-'); | |
// transliterate | |
$text = iconv('utf-8', 'us-ascii//TRANSLIT', $text); | |
// lowercase | |
$text = strtolower($text); | |
// remove unwanted characters | |
$text = preg_replace('~[^-\w]+~', '', $text); | |
if (empty($text)) { | |
return 'n-a'; | |
} | |
// Check if slug already exists | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$baseSlug = $text; | |
$counter = 1; | |
while (true) { | |
$slugToCheck = $counter > 1 ? $baseSlug . '-' . $counter : $baseSlug; | |
$query = $db->getQuery(true) | |
->select($db->quoteName('alias')) | |
->from($db->quoteName('#__content')) | |
->where($db->quoteName('alias') . ' = ' . $db->quote($slugToCheck)); | |
$db->setQuery($query); | |
$existingSlug = $db->loadResult(); | |
if (!$existingSlug) { | |
return $slugToCheck; | |
} | |
$counter++; | |
} | |
} | |
private function getOldDatabase() | |
{ | |
$option = array( | |
'driver' => 'mysql', | |
'host' => 'yourhost.something', | |
'user' => 'olddatabaseuser', | |
'password' => 'olddatabasepassword', | |
'database' => 'joomla_olddatabase', | |
'prefix' => 'oldprefix_', | |
'charset' => 'utf8', | |
); | |
$externalDb = DatabaseDriver::getInstance($option); | |
return $externalDb; | |
} | |
private function checkOldIds($skipItems = 0, $limit = DEFAULT_LIMIT, $startDate = START_DATE, $endDate = END_DATE, $sourceCategory = SOURCE_CATEGORY, $destinationCategory = TARGET_CATEGORY, $standardArticles = false, $noCleanHtml = false) | |
{ | |
//$olddatabase = Factory::getContainer()->get('DatabaseDriver')->getConnection($this->getOldDatabase()); | |
$olddatabase = $this->getOldDatabase(); | |
//$options = $this->getOldDatabase(); | |
// var_dump($options); | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
// Query for the rest of the content (excluding introtext and fulltext) | |
if ($standardArticles) { | |
// If the Argument --standard_articles is defined as True | |
// query to select articles from the Content table in the old database | |
$queryfromOld = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__content') | |
->where('catid = ' . $olddatabase->quote($sourceCategory)) | |
->where('created >= ' . $olddatabase->quote($startDate)) // '2023-01-01 00:00:00 | |
->where('created < ' . $olddatabase->quote($endDate)) // '2023-12-31 23:59:59 | |
->order('id ASC'); | |
if ($skipItems > 0) { | |
// Add an OFFSET clause to skip the first X articles | |
$queryfromOld->setLimit($limit, $skipItems); | |
} else { | |
// if there are no items to skip, use the normal limit | |
$queryfromOld->setLimit($limit); | |
} | |
$olddatabase->setQuery($queryfromOld); | |
$results = $olddatabase->loadObjectList(); | |
} else { | |
$queryfromOld = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__f2c_form') | |
->where('projectid = 5') | |
->where('state = 1') | |
->where('catid = ' . $olddatabase->quote($sourceCategory)) | |
->where('created >= ' . $olddatabase->quote($startDate)) // '2023-01-01 00:00:00 | |
->where('created < ' . $olddatabase->quote($endDate)) // '2023-12-31 23:59:59 | |
->order('id ASC'); | |
if ($skipItems > 0) { | |
// Add an OFFSET clause to skip the first X articles | |
$queryfromOld->setLimit($limit, $skipItems); | |
} else { | |
// if there are no items to skip, use the normal limit | |
$queryfromOld->setLimit($limit); | |
} | |
$olddatabase->setQuery($queryfromOld); | |
$results = $olddatabase->loadObjectList(); | |
} | |
echo "Total articles to be imported: " . count($results) . "\n"; | |
$this->logAction("Total articles to be imported: " . count($results) . "\n"); | |
$contador = $skipItems; // articles counter | |
// Loop through the old articles | |
foreach ($results as $oldarticle) { | |
$titleLengthLimit = 100; | |
// Verifica se o título é muito longo | |
if (strlen($oldarticle->title) > $titleLengthLimit) { | |
// Imprime no terminal o título com seu ID | |
echo "Título muito longo encontrado! ID do Artigo: " . $oldarticle->id . ", Título: " . $oldarticle->title . "\n"; | |
} | |
if ($standardArticles) { | |
$introText = $oldarticle->introtext; | |
$fullText = $oldarticle->fulltext; | |
} else { | |
$queryIntroText = $olddatabase->getQuery(true) | |
->select('content') | |
->from('#__f2c_fieldcontent') | |
->where('formid = ' . $olddatabase->quote($oldarticle->id)) | |
->where('fieldid = 58'); | |
$introText = $olddatabase->setQuery($queryIntroText)->loadResult(); | |
$queryFullText = $olddatabase->getQuery(true) | |
->select('content') | |
->from('#__f2c_fieldcontent') | |
->where('formid = ' . $olddatabase->quote($oldarticle->id)) | |
->where('fieldid = 59'); | |
$fullText = $olddatabase->setQuery($queryFullText)->loadResult(); | |
} | |
// define article | |
$article = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__content')) | |
->where($db->quoteName('id') . ' = ' . $db->quote($standardArticles ? $oldarticle->id : $oldarticle->reference_id)); | |
$db->setQuery($article); | |
$article = $db->loadObject(); | |
if (!$article) { | |
$contador++; | |
$oldDefaultArticleDataQuery = $olddatabase->getQuery(true) | |
->select('*') | |
->from('#__content') | |
->where('id = ' . $olddatabase->quote($standardArticles ? $oldarticle->id : $oldarticle->reference_id)); | |
$oldDefaultArticleData = $olddatabase->setQuery($oldDefaultArticleDataQuery)->loadObject(); | |
$article = new stdClass(); | |
// $article->id = $oldarticle->id; | |
$article->id = $standardArticles ? $oldarticle->id : $oldarticle->reference_id; | |
// get article->asset_id | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $oldarticle->id)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
$article->asset_id = isset($asset->id) ? $asset->id : ''; | |
$article->title = $oldarticle->title; | |
$article->alias = $this->slugify($oldarticle->title); | |
$article->introtext = $noCleanHtml ? $introText : $this->cleanTheHtml($introText); | |
$article->fulltext = $noCleanHtml ? $fullText : $this->cleanTheHtml($fullText); | |
$article->state = 1; | |
$article->catid = $destinationCategory; | |
$article->created = $oldarticle->created; | |
$article->created_by = $oldarticle->created_by; | |
$article->created_by_alias = $oldarticle->created_by_alias; | |
$article->modified = $oldarticle->modified; | |
// todo: verificar se o campo modified_by deveria ser importado de artigos antigos | |
// pegar modified_by do artigo antigo da tabela #__content | |
$article->modified_by = isset($oldDefaultArticleData->modified_by) ? $oldDefaultArticleData->modified_by : ''; | |
$article->checked_out = ''; | |
$article->checked_out_time = ''; | |
$article->publish_up = $oldarticle->publish_up; | |
$article->publish_down = ''; | |
$article->images = '{"image_intro":"","float_intro":"","image_intro_alt":"","image_intro_caption":"","image_fulltext":"","float_fulltext":"","image_fulltext_alt":"","image_fulltext_caption":""}'; | |
$article->urls = '{"urla":"","urlatext":"","targeta":"","urlb":"","urlbtext":"","targetb":"","urlc":"","urlctext":"","targetc":""}'; | |
$article->attribs = '{"article_layout":"","show_title":"","link_titles":"","show_tags":"","show_intro":"","info_block_position":"","info_block_show_title":"","show_category":"","link_category":"","show_parent_category":"","link_parent_category":"","show_author":"","link_author":"","show_create_date":"","show_modify_date":"","show_publish_date":"","show_item_navigation":"","show_hits":"","show_noauth":"","urls_position":"","alternative_readmore":"","article_page_title":"","show_publishing_options":"","show_article_options":"","show_urls_images_backend":"","show_urls_images_frontend":""}'; | |
$article->version = 1; | |
$article->ordering = $oldDefaultArticleData->ordering ? $oldDefaultArticleData->ordering : ''; | |
$article->metakey = $oldDefaultArticleData->metakey ? $oldDefaultArticleData->metakey : ''; | |
$article->metadesc = $oldDefaultArticleData->metadesc ? $oldDefaultArticleData->metadesc : ''; | |
$article->access = 1; | |
$article->hits = $oldDefaultArticleData->hits ? $oldDefaultArticleData->hits : 0; | |
$article->metadata = '{"robots":"","author":"","rights":""}'; | |
$article->featured = 0; | |
$article->language = '*'; | |
$article->xreference = ''; | |
$article->note = ''; | |
$db->insertObject('#__content', $article, 'id'); | |
// plural | |
if ($contador > 1) { | |
echo $contador . " completed tasks :::>> "; | |
} else { | |
echo $contador . " completed tasks :::>> "; | |
} | |
echo "Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> "; | |
// show me why I have only 1 article | |
if ($contador > 1) { | |
$this->logAction($contador . " completed tasks :::>> "); | |
} else { | |
$this->logAction($contador . " completed task :::>> "); | |
} | |
$this->logAction("Article created: " . $article->id . ": " . $article->title . " - Created at: " . $article->created . " :::>> "); | |
$this->adicionarReferenciasAsset($article); | |
$this->adicionarReferenciasWorkflow($article); | |
} else { | |
// pula artigo | |
echo "The article already exists: " . $article->id . ": " . $article->title . " :::>> "; | |
$this->logAction("The article already exists: " . $article->id . ": " . $article->title . " :::>> "); | |
$this->adicionarReferenciasAsset($article); | |
$this->adicionarReferenciasWorkflow($article); | |
} | |
} | |
} | |
private function adicionarReferenciasAsset($article) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $article->id)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
if (!$asset) { | |
$asset = new stdClass(); | |
// calculate the asset parent_id based on the category asset id | |
$queryCategoryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.category.' . $article->catid)); | |
$db->setQuery($queryCategoryAsset); | |
$categoryAsset = $db->loadObject(); | |
$asset->parent_id = $categoryAsset->id ? $categoryAsset->id : 1; | |
// calculate the asset lft, rgt , lft is max rgt + 1, rgt is lft + 1 | |
$queryMaxRgt = $db->getQuery(true) | |
->select('MAX(rgt)') | |
->from($db->quoteName('#__assets')); | |
$db->setQuery($queryMaxRgt); | |
$maxRgt = $db->loadResult(); | |
$asset->lft = $maxRgt + 1; | |
$asset->rgt = $maxRgt + 2; | |
// asset level is the same as the category level | |
$asset->level = $categoryAsset->level ? $categoryAsset->level : 1; | |
$asset->name = 'com_content.article.' . $article->id; | |
$asset->title = $article->title; | |
$asset->rules = '{}'; | |
$db->insertObject('#__assets', $asset, 'id'); | |
echo "Asset created: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("Asset created: " . $asset->id . ": " . $asset->name . " :::>> "); | |
$this->updateAssetId($article->id); | |
} else { | |
echo "The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("The asset already exists: " . $asset->id . ": " . $asset->name . " :::>> "); | |
} | |
} | |
private function adicionarReferenciasWorkflow($article) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryWorkflow = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__workflow_associations')) | |
->where($db->quoteName('item_id') . ' = ' . $db->quote($article->id)); | |
$db->setQuery($queryWorkflow); | |
$workflow = $db->loadObject(); | |
if (!$workflow) { | |
$workflow = new stdClass(); | |
$workflow->item_id = $article->id; | |
$workflow->stage_id = 1; | |
$workflow->extension = "com_content.article"; | |
$db->insertObject('#__workflow_associations', $workflow, 'id'); | |
echo "The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n"; | |
$this->logAction("The workflow was created: " . $workflow->item_id . ": " . $workflow->extension . "\n"); | |
} else { | |
echo "The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n"; | |
$this->logAction("The workflow already exists: " . $workflow->item_id . ": " . $workflow->extension . "\n"); | |
} | |
} | |
// update asset_id in content table based on assets table and article id | |
private function updateAssetId($articleId) | |
{ | |
$db = Factory::getContainer()->get('DatabaseDriver'); | |
$queryAsset = $db->getQuery(true) | |
->select('*') | |
->from($db->quoteName('#__assets')) | |
->where($db->quoteName('name') . ' = ' . $db->quote('com_content.article.' . $articleId)); | |
$db->setQuery($queryAsset); | |
$asset = $db->loadObject(); | |
if ($asset) { | |
$queryContent = $db->getQuery(true) | |
->update($db->quoteName('#__content')) | |
->set($db->quoteName('asset_id') . ' = ' . $db->quote($asset->id)) | |
->where($db->quoteName('id') . ' = ' . $db->quote($articleId)); | |
$db->setQuery($queryContent); | |
$db->execute(); | |
echo "Asset updated: " . $asset->id . ": " . $asset->name . " :::>> "; | |
$this->logAction("Asset updated: " . $asset->id . ": " . $asset->name . " :::>> "); | |
} else { | |
echo "Asset not found: " . $articleId . "\n"; | |
$this->logAction("Asset not found: " . $articleId . "\n"); | |
} | |
} | |
// Log actions | |
private function logAction($message) { | |
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt'; | |
$logEntry = date('Y-m-d H:i:s') . ' - ' . $message . PHP_EOL; | |
file_put_contents($logFile, $logEntry, FILE_APPEND); | |
} | |
// Clean HTML | |
private function cleanTheHtml($string, $limpaTudo = false) | |
{ | |
$string = preg_replace('/(<[^>]+) class=".*?"/i', '$1', $string); | |
$string = preg_replace('/(class|style)[^>]*/', '', $string); | |
// replace <p > to <p> and other tags | |
$string = preg_replace('/<p\s+>/', '<p>', $string); | |
// replace • to - | |
$string = preg_replace('/•/', '-', $string); | |
return $limpaTudo ? strip_tags($string) : strip_tags($string, '<iframe><img><p><span><a><b><div><strong><em><br><br/><br />'); | |
} | |
private function clearLog() | |
{ | |
$logFile = Factory::getConfig()->get('log_path') . '/import_log.txt'; | |
if (file_exists($logFile)) { | |
file_put_contents($logFile, ''); // Clear the log file | |
echo "Log file cleared.\n"; | |
} else { | |
echo "Log file does not exist.\n"; | |
} | |
} | |
private function clearScreen() | |
{ | |
echo "\033[2J\033[;H"; // Clear the screen | |
} | |
// Execute the checkOldIds function | |
protected function configure() | |
{ | |
$input = new Symfony\Component\Console\Input\ArgvInput(); | |
$definition = new Symfony\Component\Console\Input\InputDefinition([ | |
new Symfony\Component\Console\Input\InputOption('skip', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'Skip the first X articles'), | |
new Symfony\Component\Console\Input\InputOption('limit', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'Limit the number of articles to be imported'), | |
new Symfony\Component\Console\Input\InputOption('start_date', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'Start date of import'), | |
new Symfony\Component\Console\Input\InputOption('end_date', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'End date of import'), | |
new Symfony\Component\Console\Input\InputOption('source_category', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'Source category ID'), | |
new Symfony\Component\Console\Input\InputOption('destination_category', null, Symfony\Component\Console\Input\InputOption::VALUE_REQUIRED, 'Destination category ID'), | |
new Symfony\Component\Console\Input\InputOption('standard_articles', null, Symfony\Component\Console\Input\InputOption::VALUE_NONE, 'Import Joomla standard articles'), | |
new Symfony\Component\Console\Input\InputOption('no_clean_html', null, Symfony\Component\Console\Input\InputOption::VALUE_NONE, 'Do not clean HTML in imported articles'), | |
]); | |
$input->bind($definition); | |
$skipItems = $input->getOption('skip') ? $input->getOption('skip') : 0; | |
$limit = $input->getOption('limit') ? $input->getOption('limit') : DEFAULT_LIMIT; | |
$startDate = $input->getOption('start_date') ? $input->getOption('start_date') : START_DATE; | |
$endDate = $input->getOption('end_date') ? $input->getOption('end_date') : END_DATE; | |
$sourceCategory = $input->getOption('source_category') ? $input->getOption('source_category') : SOURCE_CATEGORY; | |
$destinationCategory = $input->getOption('destination_category') ? $input->getOption('destination_category') : TARGET_CATEGORY; | |
$standardArticles = $input->getOption('standard_articles') ? true : false; | |
$noCleanHtml = $input->getOption('no_clean_html') ? true : false; | |
$this->checkOldIds($skipItems, $limit, $startDate, $endDate, $sourceCategory, $destinationCategory, $standardArticles, $noCleanHtml); | |
} | |
public function execute() | |
{ | |
$this->configure(); | |
} | |
// Implement the getName method | |
public function getName() :string | |
{ | |
return 'ImportArticles'; | |
} | |
} | |
// JApplicationCli::getInstance('ImportArticles')->execute(); | |
// Instantiate the application | |
$application = new ImportArticles; | |
// Execute the application | |
$application->execute(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment