Created
January 10, 2018 13:05
-
-
Save AliceWonderMiscreations/7b0efa45bb67196456aa2f5e924dd60e to your computer and use it in GitHub Desktop.
php class for generating html5 content as XML with DOMDocument
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
+-----------------------------------------------------------------------+ | |
| | | |
| Copyright (c) 2012-2018 Alice Wonder Miscreations | | |
| May be used under terms of MIT license | | |
| | | |
+-----------------------------------------------------------------------+ | |
| Purpose: HTML5 as XML page generation | | |
+-----------------------------------------------------------------------+ | |
*/ | |
class html5domdoc { | |
protected $dom; | |
public $xmlHtml; | |
public $xmlHead; | |
public $xmlBody; | |
protected $rtalabel = FALSE; | |
protected $keywords = array(); | |
protected $description = ''; | |
protected $cspstring = ''; | |
protected $objectwhitelist = array('text/plain', 'text/html', 'image/webp', 'application/pdf', 'application/xhtml+xml'); | |
protected $xmlns = 'http://www.w3.org/1999/xhtml'; | |
protected $xmlLang = 'en'; | |
public $expires = 0; | |
/* any function that uses head / body should call this */ | |
protected function domNodes() { | |
$this->xmlHtml = $this->dom->getElementsByTagName('html')->item(0); | |
$this->xmlHead = $this->dom->getElementsByTagName('head')->item(0); | |
$this->xmlBody = $this->dom->getElementsByTagName('body')->item(0); | |
} | |
/* Puts head elements in logical order, called by sendPage */ | |
protected function adjustHead() { | |
$metaEquiv = array(); | |
$metaName = array(); | |
$metaProperty = array(); | |
$links = array(); | |
$scripts = array(); | |
$misc = array(); | |
$newHead = $this->dom->createElement('head'); | |
$children = $this->xmlHead->childNodes; | |
foreach ($children as $child) { | |
$newChild = $child->cloneNode(true); | |
$tag = $newChild->tagName; | |
switch ($tag) { | |
case 'meta' : | |
if ($newChild->hasAttribute('http-equiv')) { | |
$equiv = $newChild->getAttribute('http-equiv'); | |
if (strcmp($equiv, 'X-Content-Security-Policy') === 0) { | |
$newHead->appendChild($newChild); | |
} else { | |
$metaEquiv[] = $newChild; | |
} | |
} elseif($newChild->hasAttribute('name')) { | |
$metaName[] = $newChild; | |
} else { | |
$metaProperty[] = $newChild; | |
} | |
break; | |
case 'link' : | |
$links[] = $newChild; | |
break; | |
case 'script' : | |
$scripts[] = $newChild; | |
break; | |
case 'title' : | |
$newTitle = $newChild; | |
break; | |
default : | |
$misc[] = $newChild; | |
break; | |
} | |
} | |
$j = count($metaEquiv); | |
for ($i=0; $i<$j; $i++) { | |
$newHead->appendChild($metaEquiv[$i]); | |
} | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('charset', 'UTF-8'); | |
$newHead->appendChild($meta); | |
if ($this->rtalabel) { | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('name', 'RATING'); | |
$meta->setAttribute('content', 'RTA-5042-1996-1400-1577-RTA'); | |
$newHead->appendChild($meta); | |
} | |
$j = count($this->keywords); | |
if ($j > 0) { | |
$content = implode(',', array_unique($this->keywords)); | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('name', 'keywords'); | |
$meta->setAttribute('content', $content); | |
$newHead->appendChild($meta); | |
} | |
if (strlen($this->description) > 0) { | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('name', 'description'); | |
$meta->setAttribute('content', $this->description); | |
$newHead->appendChild($meta); | |
} | |
$genstring = 'PHP ' . phpversion() . ' DOMDocument/libxml2 ' . LIBXML_DOTTED_VERSION; | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('name', 'generator'); | |
$meta->setAttribute('content', $genstring); | |
$newHead->appendChild($meta); | |
$j = count($metaName); | |
for ($i=0; $i<$j; $i++) { | |
$newHead->appendChild($metaName[$i]); | |
} | |
$j = count($metaProperty); | |
for ($i=0; $i<$j; $i++) { | |
$newHead->appendChild($metaProperty[$i]); | |
} | |
$j = count($links); | |
for ($i=0; $i<$j; $i++) { | |
//preload first | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "preload") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//stylesheet second | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "stylesheet") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//icon third | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "icon") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//shortcut icon fourth | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "shortcut icon") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//manifest fifth | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "manifest") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//prefetch last | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "prefetch") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
for ($i=0; $i<$j; $i++) { | |
//canonical last | |
$rel = $links[$i]->getAttribute('rel'); | |
if($rel == "canonical") { | |
$newHead->appendChild($links[$i]); | |
} | |
} | |
$j = count($scripts); | |
for ($i=0; $i<$j; $i++) { | |
$newHead->appendChild($scripts[$i]); | |
} | |
$j = count($misc); | |
for ($i=0; $i<$j; $i++) { | |
$newHead->appendChild($misc[$i]); | |
} | |
if (! isset($newTitle)) { | |
$newTitle = $this->dom->createElement('title', 'Page Title'); | |
} | |
$newHead->appendChild($newTitle); | |
$this->xmlHead->parentNode->replaceChild($newHead, $this->xmlHead); | |
} | |
protected function sanitizeBody() { | |
$nodelist = $this->xmlBody->getElementsByTagName('script'); | |
$n = $nodelist->length; | |
for($j = $n; --$j >= 0;) { | |
$nodelist->item($j)->parentNode->removeChild($nodelist->item($j)); | |
} | |
$nodelist = $this->xmlBody->getElementsByTagName('embed'); | |
$n = $nodelist->length; | |
for($j = $n; --$j >= 0;) { | |
$nodelist->item($j)->parentNode->removeChild($nodelist->item($j)); | |
} | |
$nodelist = $this->xmlBody->getElementsByTagName('applet'); | |
$n = $nodelist->length; | |
for($j = $n; --$j >= 0;) { | |
$nodelist->item($j)->parentNode->removeChild($nodelist->item($j)); | |
} | |
$nodelist = $this->xmlBody->getElementsByTagName('object'); | |
$n = $nodelist->length; | |
for($j = $n; --$j >= 0;) { | |
$node = $nodelist->item($j); | |
$type = 'null'; | |
if($node->hasAttribute('type')) { | |
$type = strtolower(trim($node->getAttribute('type'))); | |
} | |
if(in_array($type, $this->objectwhitelist)) { | |
$node->setAttribute('typemustmatch', 'typemustmatch'); | |
} else { | |
$node->parentNode->removeChild($node); | |
} | |
} | |
} | |
protected function sanitizeTargetLinks() { | |
$nodelist = $this->xmlBody->getElementsByTagName('a'); | |
$n = $nodelist->length; | |
for($j = $n; --$j >= 0;) { | |
$node = $nodelist->item($j); | |
if($node->hasAttribute('target')) { | |
$target = trim(strtolower($node->getAttribute('target'))); | |
if($target == "_blank") { | |
if($node->hasAttribute('rel')) { | |
$relString = preg_replace('/\s+/', ' ', trim(strtolower($node->getAttribute('rel')))); | |
$relTags = explode(' ', $relString); | |
} else { | |
$relTags = array(); | |
} | |
if(! in_array('noopener', $relTags)) { | |
$relTags[] = 'noopener'; | |
} | |
if(! in_array('noreferrer', $relTags)) { | |
$href = strtolower($node->getAttribute('href')); | |
$count = substr_count($href, 'dreamstime'); | |
if($count > 0) { | |
$relTags[] = 'noreferrer'; | |
} | |
} | |
$relString = implode(' ', $relTags); | |
$node->setAttribute('rel', $relString); | |
if(! $node->hasAttribute('title')) { | |
if($node->hasAttribute('href')) { | |
$href = trim($node->getAttribute('href')); | |
$tmp = preg_replace('/:\/\//', '', $href); | |
if(strlen($tmp) < strlen($href)) { | |
$node->setAttribute('title', '[Opens new window, external link]'); | |
} else { | |
$node->setAttribute('title', '[Opens new window]'); | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
protected function fixArticleLandmarks() { | |
$articlelist = $this->xmlBody->getElementsByTagName('article'); | |
$nn = $articlelist->length; | |
$x = new DOMXPath($this->dom); | |
for($ii = 0; $ii < $nn; $ii++) { | |
$arnode = $articlelist->item($ii); | |
$nodelist = $arnode->getElementsByTagName('aside'); | |
$n = $nodelist->length; | |
for($i = 0; $i < $n; $i++) { | |
$aside = $nodelist->item($i); | |
if(! $aside->hasAttribute('role')) { | |
$aside->setAttribute('role', 'region'); | |
if(! $aside->hasAttribute('aria-labelledby')) { | |
if(! $aside->hasAttribute('aria-label')) { | |
$count = 0; | |
foreach($aside->childNodes as $child) { | |
if(!($child instanceof \DomText)) { | |
$count++; | |
} | |
} | |
if($count != 0) { | |
$first = $x->query('*', $aside)->item(0); | |
$tagname = $first->tagName; | |
if(in_array($tagname, array ('h2', 'h3', 'h4', 'h5', 'h6'))) { | |
$labelid = "aside: " . trim($first->textContent); | |
$aside->setAttribute('aria-label', $labelid); | |
} | |
} | |
} | |
} | |
} | |
} // end for $i loop | |
$hastoc = false; | |
$nodelist = $arnode->getElementsByTagName('details'); | |
$n = $nodelist->length; | |
for($i = 0; $i < $n; $i++) { | |
$details = $nodelist->item($i); | |
if($details->hasAttribute('class')) { | |
$class = $details->getAttribute('class'); | |
if(strcmp($class, 'toc') == 0) { | |
$details->setAttribute('role', 'navigation'); | |
$hastoc = true; | |
if(! $details->hasAttribute('aria-labelledby')) { | |
if(! $details->hasAttribute('aria-label')) { | |
$details->setAttribute('aria-label', 'Table of Contents'); | |
} | |
} | |
} | |
} | |
} // end of for $i loop | |
$nodelist = $arnode->getElementsByTagName('section'); | |
$n = $nodelist->length; | |
for($i = 0; $i < $n; $i++) { | |
$section = $nodelist->item($i); | |
if(! $section->hasAttribute('role')) { | |
if(! $hastoc) { | |
$section->setAttribute('role', 'region'); | |
} | |
if(! $section->hasAttribute('aria-labelledby')) { | |
if(! $section->hasAttribute('aria-label')) { | |
$count = 0; | |
foreach($section->childNodes as $child) { | |
if(!($section instanceof \DomText)) { | |
$count++; | |
} | |
} | |
if($count != 0) { | |
$first = $x->query('*', $section)->item(0); | |
$tagname = $first->tagName; | |
if($tagname == 'h2') { | |
$labelid = "section: " . trim($first->textContent); | |
$section->setAttribute('aria-label', $labelid); | |
} | |
elseif(in_array($tagname, array ('h3', 'h4', 'h5', 'h6'))) { | |
$labelid = "subsection: " . trim($first->textContent); | |
$section->setAttribute('aria-label', $labelid); | |
} | |
} | |
} | |
} | |
} | |
} // end for $i loop | |
} // end for for $ii loop | |
// fix header, main. footer | |
$header = $this->dom->getElementsByTagName('header')->item(0); | |
if(! is_null($header)) { | |
if(! $header->hasAttribute('role')) { | |
$header->setAttribute('role', 'banner'); | |
} | |
} | |
$main = $this->dom->getElementsByTagName('main')->item(0); | |
if(! is_null($main)) { | |
if(! $main->hasAttribute('role')) { | |
$main->setAttribute('role', 'main'); | |
} | |
} | |
$footer = $this->dom->getElementsByTagName('footer')->item(0); | |
if(! is_null($footer)) { | |
if(! $footer->hasAttribute('role')) { | |
$footer->setAttribute('role', 'contentinfo'); | |
} | |
} | |
$navlist = $this->dom->getElementsByTagName('nav'); | |
$j = $navlist->length; | |
for($i = 0; $i < $j; $i++) { | |
$nav = $navlist->item($i); | |
if(! $nav->hasAttribute('role')) { | |
$nav->setAttribute('role', 'navigation'); | |
} | |
} | |
} // end of function | |
//sends the headers, called by sendPage | |
protected function sendHeader($HTML, $status="") { | |
$tstamp = time(); | |
$expires = $tstamp + $this->expires; | |
if($status == "200") { | |
header("HTTP/1.1 200 OK"); | |
} | |
if(strlen($this->cspstring) > 0) { | |
header('Content-Security-Policy: ' . $this->cspstring); | |
} | |
if($this->rtalabel) { | |
header('Rating: RTA-5042-1996-1400-1577-RTA'); | |
} | |
if($HTML) { | |
header('Content-Type: text/html; charset=utf-8'); | |
} else { | |
header('Content-Type: application/xhtml+xml; charset=utf-8'); | |
} | |
date_default_timezone_set('UTC'); | |
header('Last-Modified: ' . preg_replace('/\+0000$/', 'GMT', date('r', $tstamp))); | |
header('Expires: ' . preg_replace('/\+0000$/', 'GMT', date('r', $expires))); | |
if($this->expires == 0) { | |
header('Cache-Control: private, no-cache, must-revalidate'); | |
header('Pragma: no-cache'); | |
} | |
header_remove('X-Powered-By'); | |
} | |
public function rtalabel() { | |
$this->rtalabel = true; | |
} | |
public function whiteListObject($type) { | |
$type = strtolower(trim($type)); | |
if(! in_array($type, $this->objectwhitelist)) { | |
$this->objectwhitelist[] = $type; | |
} | |
} | |
public function addKeywords($arg=array()) { | |
if (is_array($arg)) { | |
$this->keywords = array_merge($this->keywords, $arg); | |
} else { | |
$this->keywords[] = $arg; | |
} | |
} | |
public function addOpenGraph($property, $content, $set='', $model='', $orig='') { | |
if((strlen($property) * strlen($content)) == 0) { | |
return FALSE; | |
} | |
if(substr($property, 0, 3) != 'og:') { | |
$property = 'og:' . $property; | |
} | |
$set = trim($set); | |
$model = trim($model); | |
$orig = trim($orig); | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('property', $property); | |
$meta->setAttribute('content', $content); | |
if((strlen($set) * strlen($model)) != 0) { | |
$meta->setAttribute('data-2257', $set); | |
$meta->setAttribute('data-model', $model); | |
} | |
if(strlen($orig) > 0) { | |
$meta->setAttribute('data-orig', $orig); | |
} | |
$this->xmlHead->appendChild($meta); | |
} | |
public function addTwitterCard($property, $content, $set='', $model='', $orig='') { | |
if((strlen($property) * strlen($content)) == 0) { | |
return FALSE; | |
} | |
if(substr($property, 0, 8) != 'twitter:') { | |
$property = 'twitter:' . $property; | |
} | |
$set = trim($set); | |
$model = trim($model); | |
$orig = trim($orig); | |
$meta = $this->dom->createElement('meta'); | |
$meta->setAttribute('name', $property); | |
$meta->setAttribute('content', $content); | |
if((strlen($set) * strlen($model)) != 0) { | |
$meta->setAttribute('data-2257', $set); | |
$meta->setAttribute('data-model', $model); | |
} | |
if(strlen($orig) > 0) { | |
$meta->setAttribute('data-orig', $orig); | |
} | |
$this->xmlHead->appendChild($meta); | |
} | |
public function addDescription($desc) { | |
$this->description = $desc; | |
} | |
public function addStyleSheet($stylename, $serverpath, $fspath="") { | |
$stylename = trim($stylename); | |
$serverpath = trim($serverpath); | |
$fspath = trim($fspath); | |
$this->domNodes(); | |
if(strlen($fspath) > 0) { | |
$fullpath = $fspath . $stylename; | |
if(file_exists($fullpath)) { | |
$modtime = filemtime($fullpath); | |
$stylename = preg_replace('/\.css$/', '-' . $modtime . '.css', $stylename); | |
} | |
} | |
$style = $this->dom->createElement('link'); | |
$style->setAttribute('rel', 'stylesheet'); | |
$style->setAttribute('type', 'text/css'); | |
$style->setAttribute('href', $serverpath . $stylename); | |
$this->xmlHead->appendChild($style); | |
} | |
public function addJavaScript($scriptname, $serverpath, $fspath="") { | |
$scriptname = trim($scriptname); | |
$serverpath = trim($serverpath); | |
$fspath = trim($fspath); | |
$this->domNodes(); | |
if(strlen($fspath) > 0) { | |
$fullpath = $fspath . $scriptname; | |
if(file_exists($fullpath)) { | |
$modtime = filemtime($fullpath); | |
$scriptname = preg_replace('/\.js$/', '-' . $modtime . '.js', $scriptname); | |
} | |
} | |
$script = $this->dom->createElement('script'); | |
$script->setAttribute('type', 'application/javascript'); | |
$script->setAttribute('src', $serverpath . $scriptname); | |
$this->xmlHead->appendChild($script); | |
} | |
public function sendPage($HTML=FALSE, $status="") { | |
$this->domNodes(); | |
$this->sanitizeBody(); | |
$this->sanitizeTargetLinks(); | |
$this->fixArticleLandmarks(); | |
$this->adjustHead(); | |
if($HTML) { | |
$this->xmlHtml->setAttribute('lang', $this->xmlLang); | |
} else { | |
$this->xmlHtml->setAttribute('xmlns', $this->xmlns); | |
$this->xmlHtml->setAttributeNS('http://www.w3.org/XML/1998/namespace', 'xml:lang', $this->xmlLang); | |
} | |
if($status == "200") { | |
$this->sendHeader($HTML, "200"); | |
} else { | |
$this->sendHeader($HTML); | |
} | |
$search[] = '/audio\/x-matroska;codecs=matroska/'; $replace[] = 'audio/x-matroska;codecs=aac'; | |
if($HTML) { | |
$search[] = '/<\/source>/'; $replace[] = ''; | |
$search[] = '/<\/track>/'; $replace[] = ''; | |
$search[] = '/<\/script></'; $replace[] = "</script>\n<"; | |
print preg_replace($search, $replace, $this->dom->saveHTML()); | |
} else { | |
$search[] = '/<default:/'; $replace[] = '<'; | |
$search[] = '/<\/default:/'; $replace[] = '</'; | |
print preg_replace($search, $replace, $this->dom->saveXML()); | |
} | |
} | |
public function html5domdoc($dom, $xmlLang="en") { | |
$this->xmlLang = $xmlLang; | |
$this->dom = $dom; | |
$docstring = '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE html><html><head /><body /></html>'; | |
$this->dom->loadXML($docstring); | |
$this->domNodes(); | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment