Last active
April 26, 2016 14:56
-
-
Save geerteltink/7d9a52e6921874ebde8db5eb08e4f7b9 to your computer and use it in GitHub Desktop.
PHP 7 DOMDocument: utf-8 encoding not working as expected
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Valid UTF-8 string | |
$htmlForm = <<<'HTML' | |
<form action="/" method="post"> | |
<input type="text" id="title" name="title" required value="Îñţérñåţîöñåļîžåţîöñ" /> | |
<button type="submit" class="btn btn-primary">Submit</button> | |
</form> | |
HTML; | |
// Check encoding | |
// Output: UTF-8 | |
var_dump(mb_detect_encoding($htmlForm)); | |
// NOT WORKING !!!! | |
$dom = new DOMDocument('1.0', 'utf-8'); | |
$dom->loadHTML($htmlForm, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); | |
// Output: // <input value="Îñţérñåţîöñåļîžåţîöñ"> | |
var_dump($dom->saveHTML()); | |
// Working option 1 | |
$dom = new DOMDocument('1.0', 'utf-8'); | |
$dom->loadHTML('<?xml encoding="UTF-8">' . $htmlForm, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); | |
// Output: <input value="Îñţérñåţîöñåļîžåţîöñ"> | |
var_dump($dom->saveHTML($dom->getElementsByTagName('form')->item(0))); | |
// Working option 2 | |
$dom = new DOMDocument('1.0', 'utf-8'); | |
$dom->loadHTML($htmlForm, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); | |
// Output: <input value="Îñţérñåţîöñåļîžåţîöñ"> | |
var_dump(utf8_decode($dom->saveHTML($dom->getElementsByTagName('form')->item(0)))); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment