Skip to content

Instantly share code, notes, and snippets.

@umer936
Created May 19, 2025 17:13
Show Gist options
  • Save umer936/08218786126905ba2d1e25726562cf31 to your computer and use it in GitHub Desktop.
Save umer936/08218786126905ba2d1e25726562cf31 to your computer and use it in GitHub Desktop.
HTML to PHPWord
<?php
declare(strict_types=1);
/**
* Writes a Word document of abstracts
*
* @param string $path The path to the Word document
* @param int|null $conference_id
* @return bool Whether the write was successful
* @throws \PhpOffice\PhpWord\Exception\Exception
*/
public function writeDocument(string $path, int|null $conference_id = null): bool
{
$word = new PhpWord();
$word->addTitleStyle(1, ['bold' => true, 'size' => 16]);
$word->addParagraphStyle('abstract', ['indentation' => ['left' => 240]]);
// Add Conference Title as a header
$section = $word->addSection();
$headerText = $section->addTextRun(['alignment' => 'center']);
$headerText->addText(CONFERENCE_TITLE, ['bold' => true, 'size' => 18]);
$section->addText("\n\n");
// Maybe have a logo and the text: "Book of Abstracts"
// Can be added with: $section->addImage('path_to_logo');
// Loop through each track and add its abstracts
$tracks = $this->Tracks->find();
foreach ($tracks as $track) {
if ($abstracts->count()) {
$section = $word->addSection();
$section->addTitle(h($track->description));
foreach ($abstracts as $abstract) {
$section->addTextRun()->addText(h($abstract->registrant->full_name), ['bold' => true]);
// Clean and process the abstract HTML
$cleanedAbstract = $this->sanitizeHtml($abstract->abstract);
// Add the processed HTML content to the section
$this->addHtmlContentToSection($section, $cleanedAbstract);
}
}
}
// Save the document
$writer = IOFactory::createWriter($word);
$writer->save($path);
// Return whether the file exists after saving
return file_exists($path);
}
/**
* Sanitize the HTML content before adding it to the Word document.
*
* @param string $html The HTML content to sanitize.
* @return string The sanitized HTML.
*/
private function sanitizeHtml(string $html): string
{
// Decode HTML entities
$html = html_entity_decode($html, ENT_QUOTES | ENT_HTML5, 'UTF-8');
// Allow only safe HTML tags (this list can be adjusted as needed)
$allowedTags = '<p><br><b><i><u><em><strong><ul><ol><li><a>';
// Remove any tags not in the allowed list
$html = strip_tags($html, $allowedTags);
// You can also perform additional sanitization like removing harmful attributes (e.g., on <a> tags)
$html = preg_replace('/<a\s+([^>]+)>/', '<a>', $html); // Strips out attributes from <a> tags, like href
return $html;
}
/**
* Adds the sanitized HTML content to the PhpWord section.
*
* @param \PhpOffice\PhpWord\Element\Section $section The PhpWord section where the content will be added.
* @param string $html The HTML content to add.
*/
private function addHtmlContentToSection(\PhpOffice\PhpWord\Element\Section $section, string $html)
{
// Check if the HTML content is just a single paragraph without complex tags
if (preg_match('/^<p>.*<\/p>$/', $html)) {
// If it's a single <p>, treat it as a simple block of text
$section->addText(strip_tags($html), null, 'abstract');
} else {
// Load the HTML content into a DOMDocument for more complex HTML
$dom = new \DOMDocument();
libxml_use_internal_errors(true); // Disable warnings for malformed HTML
$dom->loadHTML('<html><body>' . $html . '</body></html>'); // Wrap HTML in a body tag to make it valid
libxml_clear_errors();
// Process each child element of the body
$body = $dom->getElementsByTagName('body')->item(0);
foreach ($body->childNodes as $node) {
if ($node->nodeName === 'p') {
// Handle paragraphs (<p>)
$section->addTextBreak(1);
$this->addTextNodeToSection($section, $node);
} elseif ($node->nodeName === 'br') {
// Handle line breaks (<br>)
$section->addTextBreak(1);
} else {
// Handle other text nodes
$this->addTextNodeToSection($section, $node);
}
}
}
}
/**
* Adds a text node to the PhpWord section, handling basic formatting.
*
* @param \PhpOffice\PhpWord\Element\Section $section The PhpWord section.
* @param \DOMNode $node The DOMNode containing the text.
*/
private function addTextNodeToSection(\PhpOffice\PhpWord\Element\Section $section, \DOMNode $node)
{
match ($node->nodeName) {
'b', 'strong' => $section->addText($node->nodeValue, ['bold' => true], 'abstract'),
'i', 'em' => $section->addText($node->nodeValue, ['italic' => true], 'abstract'),
'u' => $section->addText($node->nodeValue, ['underline' => true], 'abstract'),
default => $section->addText($node->nodeValue, null, 'abstract'),
};
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment