Last active
October 15, 2024 10:25
-
-
Save Pelirrojo/8554604710ad155cd5ae8d833dcf28ce to your computer and use it in GitHub Desktop.
PHP Script to export all wordpress content in plain text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Ensure this script its used by consola or exit process | |
if (php_sapi_name() !== 'cli') { | |
die('This script can only be run from the command line.'); | |
} | |
// Connect to datatase (get all this data from wp-config.php) | |
$db_host = '<db_ip>:3306'; | |
$db_name = '<db_name>'; | |
$db_user = '<db_user>'; | |
$db_pass = '<db_password>'; | |
$conn = new mysqli($db_host, $db_user, $db_pass, $db_name); | |
if ($conn->connect_error) { | |
die("Connection failed: " . $conn->connect_error); | |
} | |
// Prepare an SQL query to get all pages | |
$sql = "SELECT p.ID, p.post_title, p.post_content, p.post_name, p.post_parent, | |
(SELECT meta_value FROM wp_postmeta WHERE post_id = p.ID AND meta_key = '_wp_page_template') as template, | |
(SELECT t.slug FROM wp_terms t | |
JOIN wp_term_taxonomy tt ON t.term_id = tt.term_id | |
JOIN wp_term_relationships tr ON tt.term_taxonomy_id = tr.term_taxonomy_id | |
WHERE tr.object_id = p.ID AND tt.taxonomy = 'language') as language | |
FROM wp_posts p | |
WHERE p.post_type = 'page' AND p.post_status = 'publish' | |
ORDER BY p.ID"; | |
$result = $conn->query($sql); | |
if ($result->num_rows > 0) { | |
while($row = $result->fetch_assoc()) { | |
echo "id=" . $row["ID"] . " -----------------------------------------------\n"; | |
echo "/" . $row["post_name"] . "\n"; | |
echo $row["post_title"] . "\n"; | |
echo ($row["post_parent"] != 0 ? $row["post_parent"] : "none") . "\n"; | |
echo ($row["language"] ? $row["language"] : "en") . "\n\n"; | |
// Clean all the HTML tags and format | |
$content = strip_tags($row["post_content"]); | |
$content = preg_replace("/\[.*?\]/", "", $content); // Delete shortcode | |
$content = trim(preg_replace('/\s+/', ' ', $content)); // Delete multiple spaces | |
echo $content . "\n\n"; | |
} | |
} else { | |
echo "No pages found."; | |
} | |
echo ">>>>>>>>>>>>>>\n(" . $result->num_rows . ") pages \n<<<<<<<<<<<<<<"; | |
$conn->close(); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment