Created
May 19, 2025 17:13
-
-
Save umer936/08218786126905ba2d1e25726562cf31 to your computer and use it in GitHub Desktop.
HTML to PHPWord
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
declare(strict_types=1); | |
/** | |
* Writes a Word document of abstracts | |
* | |
* @param string $path The path to the Word document | |
* @param int|null $conference_id | |
* @return bool Whether the write was successful | |
* @throws \PhpOffice\PhpWord\Exception\Exception | |
*/ | |
public function writeDocument(string $path, int|null $conference_id = null): bool | |
{ | |
$word = new PhpWord(); | |
$word->addTitleStyle(1, ['bold' => true, 'size' => 16]); | |
$word->addParagraphStyle('abstract', ['indentation' => ['left' => 240]]); | |
// Add Conference Title as a header | |
$section = $word->addSection(); | |
$headerText = $section->addTextRun(['alignment' => 'center']); | |
$headerText->addText(CONFERENCE_TITLE, ['bold' => true, 'size' => 18]); | |
$section->addText("\n\n"); | |
// Maybe have a logo and the text: "Book of Abstracts" | |
// Can be added with: $section->addImage('path_to_logo'); | |
// Loop through each track and add its abstracts | |
$tracks = $this->Tracks->find(); | |
foreach ($tracks as $track) { | |
if ($abstracts->count()) { | |
$section = $word->addSection(); | |
$section->addTitle(h($track->description)); | |
foreach ($abstracts as $abstract) { | |
$section->addTextRun()->addText(h($abstract->registrant->full_name), ['bold' => true]); | |
// Clean and process the abstract HTML | |
$cleanedAbstract = $this->sanitizeHtml($abstract->abstract); | |
// Add the processed HTML content to the section | |
$this->addHtmlContentToSection($section, $cleanedAbstract); | |
} | |
} | |
} | |
// Save the document | |
$writer = IOFactory::createWriter($word); | |
$writer->save($path); | |
// Return whether the file exists after saving | |
return file_exists($path); | |
} | |
/** | |
* Sanitize the HTML content before adding it to the Word document. | |
* | |
* @param string $html The HTML content to sanitize. | |
* @return string The sanitized HTML. | |
*/ | |
private function sanitizeHtml(string $html): string | |
{ | |
// Decode HTML entities | |
$html = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8'); | |
// Allow only safe HTML tags (this list can be adjusted as needed) | |
$allowedTags = '<p><br><b><i><u><em><strong><ul><ol><li><a>'; | |
// Remove any tags not in the allowed list | |
$html = strip_tags($html, $allowedTags); | |
// You can also perform additional sanitization like removing harmful attributes (e.g., on <a> tags) | |
$html = preg_replace('/<a\s+([^>]+)>/', '<a>', $html); // Strips out attributes from <a> tags, like href | |
return $html; | |
} | |
/** | |
* Adds the sanitized HTML content to the PhpWord section. | |
* | |
* @param \PhpOffice\PhpWord\Element\Section $section The PhpWord section where the content will be added. | |
* @param string $html The HTML content to add. | |
*/ | |
private function addHtmlContentToSection(\PhpOffice\PhpWord\Element\Section $section, string $html) | |
{ | |
// Check if the HTML content is just a single paragraph without complex tags | |
if (preg_match('/^<p>.*<\/p>$/', $html)) { | |
// If it's a single <p>, treat it as a simple block of text | |
$section->addText(strip_tags($html), null, 'abstract'); | |
} else { | |
// Load the HTML content into a DOMDocument for more complex HTML | |
$dom = new \DOMDocument(); | |
libxml_use_internal_errors(true); // Disable warnings for malformed HTML | |
$dom->loadHTML('<html><body>' . $html . '</body></html>'); // Wrap HTML in a body tag to make it valid | |
libxml_clear_errors(); | |
// Process each child element of the body | |
$body = $dom->getElementsByTagName('body')->item(0); | |
foreach ($body->childNodes as $node) { | |
if ($node->nodeName === 'p') { | |
// Handle paragraphs (<p>) | |
$section->addTextBreak(1); | |
$this->addTextNodeToSection($section, $node); | |
} elseif ($node->nodeName === 'br') { | |
// Handle line breaks (<br>) | |
$section->addTextBreak(1); | |
} else { | |
// Handle other text nodes | |
$this->addTextNodeToSection($section, $node); | |
} | |
} | |
} | |
} | |
/** | |
* Adds a text node to the PhpWord section, handling basic formatting. | |
* | |
* @param \PhpOffice\PhpWord\Element\Section $section The PhpWord section. | |
* @param \DOMNode $node The DOMNode containing the text. | |
*/ | |
private function addTextNodeToSection(\PhpOffice\PhpWord\Element\Section $section, \DOMNode $node) | |
{ | |
match ($node->nodeName) { | |
'b', 'strong' => $section->addText($node->nodeValue, ['bold' => true], 'abstract'), | |
'i', 'em' => $section->addText($node->nodeValue, ['italic' => true], 'abstract'), | |
'u' => $section->addText($node->nodeValue, ['underline' => true], 'abstract'), | |
default => $section->addText($node->nodeValue, null, 'abstract'), | |
}; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment