Last active
December 20, 2015 04:19
-
-
Save fsuter/6070209 to your computer and use it in GitHub Desktop.
Example files for importing WordPress entries into TYPO3 (in a custom table) using extension external_import.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/*************************************************************** | |
* Copyright notice | |
* | |
* (c) 2010 Francois Suter (Cobweb) <[email protected]> | |
* All rights reserved | |
* | |
* This script is part of the TYPO3 project. The TYPO3 project is | |
* free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 2 of the License, or | |
* (at your option) any later version. | |
* | |
* The GNU General Public License can be found at | |
* http://www.gnu.org/copyleft/gpl.html. | |
* | |
* This script is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* This copyright notice MUST APPEAR in all copies of the script! | |
***************************************************************/ | |
/** | |
* This class hooks into the external import process to modify some data | |
* | |
* @author Francois Suter (Cobweb) <[email protected]> | |
* @package TYPO3 | |
* @subpackage tx_journal | |
*/ | |
class tx_journal_externalimport_hooks { | |
static protected $wpCategoriesToDamCategories = array( | |
1 => 19, | |
2 => 6, | |
3 => 7, | |
4 => 5, | |
5 => 11, | |
6 => 14, | |
7 => 18, | |
8 => 16, | |
9 => 12, | |
10 => 29, | |
11 => 19, | |
12 => 21, | |
); | |
static protected $additionalDamCategories = array( | |
3 => array(8), | |
4 => array(15), | |
8 => array(5) | |
); | |
/** | |
* This method processes an array of data coming from external_import, just after it was read from the external source | |
* | |
* @param array $records The data to process | |
* @param tx_externalimport_importer $importerObject Back-reference to the external_import object | |
* @return array The modified data array | |
*/ | |
public function preprocessRawRecordset($records, tx_externalimport_importer $importerObject) { | |
$table = $importerObject->getTableName(); | |
$index = $importerObject->getIndex(); | |
// For imports referring to file paths, extract only the file name | |
if (($table == 'tx_dam' && $index == 0)) { | |
$numRecords = count($records); | |
for ($i = 0; $i < $numRecords; $i++) { | |
$urlParts = parse_url($records[$i]['file_name']); | |
$records[$i]['file_name'] = basename($urlParts['path']); | |
} | |
} elseif (($table == 'tx_journal_entries' && $index == 1)) { | |
$numRecords = count($records); | |
$counterPerPost = array(); | |
for ($i = 0; $i < $numRecords; $i++) { | |
$externalId = $records[$i]['wp_id']; | |
if (isset($counterPerPost[$externalId])) { | |
$counterPerPost[$externalId]++; | |
} else { | |
$counterPerPost[$externalId] = 0; | |
} | |
$urlParts = parse_url($records[$i]['gallery']); | |
// If this is the first image and the post start with an img tag, this is the thumbnail | |
if ($counterPerPost[$externalId] == 0 && strpos($records[$i]['bodytext'], '<img') === 0) { | |
$records[$i]['thumbnail'] = basename($urlParts['path']); | |
unset($records[$i]['gallery']); | |
// Otherwise it's a gallery image | |
} else { | |
unset($records[$i]['thumbnail']); | |
$records[$i]['gallery'] = basename($urlParts['path']); | |
} | |
} | |
} elseif (($table == 'tx_journal_entries' && $index == 2)) { | |
// Get the existing entries | |
$journalEntries = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows('uid, wp_id', 'tx_journal_entries', 'wp_id > 0 AND pid = 69', '', '', '', 'wp_id'); | |
// t3lib_div::devLog('Existing entries', 'external_import', 0, $journalEntries); | |
$numRecords = count($records); | |
// This array will contain related entries beyond the first ones | |
// (i.e. copies of the original record, with relation to the additional entries) | |
$relatedEntries = array(); | |
for ($i = 0; $i < $numRecords; $i++) { | |
// Find if there's an opening image tag and remove it | |
$records[$i]['bodytext'] = preg_replace('/^<img\s(.+?)\s\/>/', '', $records[$i]['bodytext']); | |
$relatedEntries[$i] = array(); | |
// Remove the "default" related entry | |
unset($records[$i]['related']); | |
$matches = array(); | |
$result = preg_match_all('/<a\s(.*?)href="([^"]+?)"[^>]*?>(.+?)<\/a>/', $records[$i]['bodytext'], $matches); | |
if ($result) { | |
// Make sure to replace at least all "target blank" attributes with nothing | |
$searchMap = array( | |
' target="_blank"' | |
); | |
$replaceMap = array( | |
'' | |
); | |
// t3lib_div::devLog('Matches', 'external_import', 0, $matches); | |
for ($j = 0; $j < $result; $j++) { | |
$urlParts = parse_url($matches[2][$j]); | |
// Check if it's a local URL | |
if ($urlParts['host'] == 'www.monpetitcoin.com') { | |
// It's an image, we want to remove the whole link and its content | |
if (strpos($urlParts['path'], 'wp-content') !== FALSE) { | |
$searchMap[] = $matches[0][$j]; | |
$replaceMap[] = ''; | |
// It's a journal entry | |
} elseif (strpos($urlParts['path'], 'francois/blog') !== FALSE) { | |
// Extract the Wordpress id and try to match it to an imported entry | |
$queryParameters = t3lib_div::explodeUrl2Array($urlParts['query']); | |
// t3lib_div::devLog('Query parameters', 'external_import', 0, $queryParameters); | |
if (isset($queryParameters['p']) && isset($journalEntries[$queryParameters['p']])) { | |
$entryId = $journalEntries[$queryParameters['p']]['uid']; | |
$searchMap[] = 'href="' . $matches[2][$j] . '"'; | |
$replaceMap[] = 'href="record:tx_journal_entries:' . $entryId . '" external="1"'; | |
// Add a "related" entry (if it's the first) | |
if (count($relatedEntries[$i]) == 0) { | |
$records[$i]['related'] = $queryParameters['p']; | |
} | |
// Keep record of all related entries | |
$relatedEntries[$i][] = $queryParameters['p']; | |
} | |
// It's some other link, do nothing but record it in the devLog | |
} else { | |
t3lib_div::devLog('Not a content link: ' . $matches[2][$j], 'external_import', 1); | |
} | |
// It's an external link, do nothing but record it in the devLog | |
} else { | |
t3lib_div::devLog('Not a local link: ' . $matches[2][$j], 'external_import', 1); | |
} | |
} | |
if (count($searchMap) > 0) { | |
// t3lib_div::devLog('Search and replace', 'external_import', 0, array('search' => $searchMap, 'replace' => $replaceMap)); | |
$searchMap[] = '<p align="center"></p>'; | |
$replaceMap[] = ''; | |
$replaced = str_replace($searchMap, $replaceMap, $records[$i]['bodytext']); | |
// t3lib_div::devLog('Cleaned up entry', 'external_import', 0, array(htmlspecialchars($replaced))); | |
$records[$i]['bodytext'] = str_replace($searchMap, $replaceMap, $records[$i]['bodytext']); | |
} | |
} | |
// WordPress does not store <p> tags unless they have attributes. It used line breaks instead. | |
// So explode bodytext on line breaks and reinstate <p> tags wherever necessary | |
// Take this opportunity to remove empty paragraphs | |
$paragraphs = t3lib_div::trimExplode("\n", $records[$i]['bodytext'], TRUE); | |
// t3lib_div::devLog('Paragraphs', 'external_import', 0, $paragraphs); | |
$records[$i]['bodytext'] = ''; | |
foreach ($paragraphs as $aParagraph) { | |
$aParagraph = trim($aParagraph); | |
if (!empty($aParagraph)) { | |
if (strpos($aParagraph, '<p>') === FALSE) { | |
$records[$i]['bodytext'] .= '<p>' . $aParagraph . '</p>'; | |
} else { | |
$records[$i]['bodytext'] .= $aParagraph; | |
} | |
} | |
} | |
} | |
// Loop again on all records, to duplicate those that have more than 1 related entry | |
$newRecordIndex = $numRecords; | |
for ($i = 0; $i < $numRecords; $i++) { | |
if (count($relatedEntries[$i]) > 1) { | |
// Drop the first element | |
array_shift($relatedEntries[$i]); | |
// Loop on the others and create copies of the original record with the new relation | |
foreach ($relatedEntries[$i] as $entryId) { | |
$records[$newRecordIndex] = $records[$i]; | |
$records[$newRecordIndex]['related'] = $entryId; | |
$newRecordIndex++; | |
} | |
} | |
} | |
} elseif (($table == 'tx_journal_entries' && $index == 3)) { | |
$numRecords = count($records); | |
for ($i = 0; $i < $numRecords; $i++) { | |
$wpCategoryId = $records[$i]['categories']; | |
$records[$i]['categories'] = self::$wpCategoriesToDamCategories[$wpCategoryId]; | |
if (isset(self::$additionalDamCategories[$wpCategoryId])) { | |
$records[$i]['categories'] .= ',' . implode(',', self::$additionalDamCategories[$wpCategoryId]); | |
} | |
} | |
} | |
return $records; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// External import configuration for WordPress stuff | |
$commonImportConfiguration = array( | |
'connector' => 'sql', | |
'data' => 'array', | |
'parameters' => array( | |
'driver' => 'mysql', | |
'server' => '127.0.0.1', | |
'user' => 'foo', | |
'password' => 'bar', | |
'database' => 'wordpress', | |
'init' => 'SET NAMES utf8' | |
), | |
'minimumRecords' => 1, | |
'enforcePid' => 1 | |
); | |
$index = 0; | |
$table = 'tx_dam'; | |
t3lib_div::loadTCA($table); | |
$TCA[$table]['ctrl']['external'][$index] = $commonImportConfiguration; | |
$TCA[$table]['ctrl']['external'][$index]['parameters']['query'] = "SELECT * FROM wp_posts WHERE post_status = 'attachment' AND post_mime_type LIKE 'image%'"; | |
$TCA[$table]['ctrl']['external'][$index]['description'] = 'Import image titles into the DAM (WP)'; | |
$TCA[$table]['ctrl']['external'][$index]['priority'] = 10; | |
$TCA[$table]['ctrl']['external'][$index]['pid'] = tx_dam_db::getPid(); | |
$TCA[$table]['ctrl']['external'][$index]['disabledOperations'] = 'insert,delete'; | |
$TCA[$table]['ctrl']['external'][$index]['reference_uid'] = 'file_name'; | |
$TCA[$table]['columns']['file_name']['external'][$index]['field'] = 'guid'; | |
$TCA[$table]['columns']['title']['external'][$index]['field'] = 'post_title'; | |
$TCA[$table]['columns']['date_cr']['external'][$index] = array( | |
'field' => 'post_date', | |
'userFunc' => array( | |
'class' => 'EXT:external_import/samples/class.tx_externalimport_transformations.php:&tx_externalimport_transformations', | |
'method' => 'parseDate', | |
'params' => array( | |
'enforceTimeZone' => TRUE | |
) | |
) | |
); | |
$TCA[$table]['columns']['date_mod']['external'][$index] = $TCA[$table]['columns']['date_cr']['external'][$index]; | |
$index = 1; | |
$table = 'tx_journal_entries'; | |
t3lib_div::loadTCA($table); | |
$TCA[$table]['ctrl']['external'][$index] = $commonImportConfiguration; | |
$TCA[$table]['ctrl']['external'][$index]['parameters']['query'] = " | |
SELECT wp_posts.ID, wp_posts.post_title, wp_posts.post_date, wp_posts.post_content, images.guid | |
FROM wp_posts | |
LEFT JOIN wp_posts AS images ON images.post_parent = wp_posts.ID | |
WHERE wp_posts.post_status = 'publish' AND wp_posts.post_mime_type = '' ORDER BY post_date | |
"; | |
$TCA[$table]['ctrl']['external'][$index]['description'] = 'Import posts (WP)'; | |
$TCA[$table]['ctrl']['external'][$index]['priority'] = 20; | |
$TCA[$table]['ctrl']['external'][$index]['pid'] = 69; | |
$TCA[$table]['ctrl']['external'][$index]['reference_uid'] = 'wp_id'; | |
$TCA[$table]['columns']['wp_id']['external'][$index]['field'] = 'ID'; | |
$TCA[$table]['columns']['title']['external'][$index]['field'] = 'post_title'; | |
$TCA[$table]['columns']['official_date']['external'][$index] = array( | |
'field' => 'post_date', | |
'userFunc' => array( | |
'class' => 'EXT:external_import/samples/class.tx_externalimport_transformations.php:&tx_externalimport_transformations', | |
'method' => 'parseDate', | |
'params' => array( | |
'enforceTimeZone' => TRUE | |
) | |
) | |
); | |
$TCA[$table]['columns']['bodytext']['external'][$index] = array( | |
'field' => 'post_content', | |
'rteEnabled' => TRUE | |
); | |
$TCA[$table]['columns']['thumbnail']['external'][$index] = array( | |
'field' => 'guid', | |
'MM' => array( | |
'mapping' => array( | |
'table' => 'tx_dam', | |
'reference_field' => 'file_name' | |
) | |
) | |
); | |
$TCA[$table]['columns']['gallery']['external'][$index] = array( | |
'field' => 'guid', | |
'MM' => array( | |
'mapping' => array( | |
'table' => 'tx_dam', | |
'reference_field' => 'file_name' | |
) | |
) | |
); | |
$index = 2; | |
$TCA[$table]['ctrl']['external'][$index] = $commonImportConfiguration; | |
$TCA[$table]['ctrl']['external'][$index]['parameters']['query'] = " | |
SELECT wp_posts.ID, wp_posts.post_content FROM wp_posts | |
WHERE wp_posts.post_status = 'publish' AND wp_posts.post_mime_type = '' ORDER BY post_date | |
"; | |
$TCA[$table]['ctrl']['external'][$index]['description'] = 'Import posts again for link checking (WP)'; | |
$TCA[$table]['ctrl']['external'][$index]['priority'] = 25; | |
$TCA[$table]['ctrl']['external'][$index]['pid'] = 69; | |
$TCA[$table]['ctrl']['external'][$index]['disabledOperations'] = 'insert,delete'; | |
$TCA[$table]['ctrl']['external'][$index]['reference_uid'] = 'wp_id'; | |
$TCA[$table]['columns']['wp_id']['external'][$index]['field'] = 'ID'; | |
$TCA[$table]['columns']['bodytext']['external'][$index] = array( | |
'field' => 'post_content', | |
'rteEnabled' => TRUE | |
); | |
$TCA[$table]['columns']['related']['external'][$index] = array( | |
'field' => 'ID', | |
'MM' => array( | |
'mapping' => array( | |
'table' => 'tx_journal_entries', | |
'reference_field' => 'wp_id' | |
) | |
) | |
); | |
// Import categories | |
$index = 3; | |
$TCA[$table]['ctrl']['external'][$index] = $commonImportConfiguration; | |
$TCA[$table]['ctrl']['external'][$index]['parameters']['query'] = " | |
SELECT wp_posts.ID, wp_post2cat.category_id FROM wp_posts | |
INNER JOIN wp_post2cat ON post_id = ID | |
WHERE wp_posts.post_status = 'publish' AND wp_posts.post_mime_type = '' | |
"; | |
$TCA[$table]['ctrl']['external'][$index]['description'] = 'Import posts again for rebuilding categories (WP)'; | |
$TCA[$table]['ctrl']['external'][$index]['priority'] = 30; | |
$TCA[$table]['ctrl']['external'][$index]['pid'] = 69; | |
$TCA[$table]['ctrl']['external'][$index]['disabledOperations'] = 'insert,delete'; | |
$TCA[$table]['ctrl']['external'][$index]['reference_uid'] = 'wp_id'; | |
$TCA[$table]['columns']['wp_id']['external'][$index]['field'] = 'ID'; | |
$TCA[$table]['columns']['categories']['external'][$index]['field'] = 'category_id'; | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment