Created
March 20, 2013 01:50
-
-
Save hakre/5201711 to your computer and use it in GitHub Desktop.
schema.org microdata reading PHP class for use with DOMDocument or SimpleXML.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/* | |
* Micro Class For Micro Data | |
* Powered by libxml | |
* | |
* @author hakre <http://hakre.wordpress.com/> | |
* @version 0.7.1 | |
*/ | |
/** | |
* Micro Class For Micro Data | |
*/ | |
class Micro implements JsonSerializable, IteratorAggregate | |
{ | |
private $xml; | |
public function __construct($xml) | |
{ | |
$this->xml = is_string($xml) | |
? ( | |
$xml[0] === '<' | |
? simplexml_load_string($xml) | |
: simplexml_load_file($xml) | |
) | |
: ( | |
($xml instanceof DOMNode) | |
? simplexml_import_dom($xml) | |
: $xml | |
); | |
} | |
/** | |
* @return Micro[] | |
*/ | |
public function scopes() | |
{ | |
$count = count($this->x('ancestor-or-self::*[@itemscope]')); | |
$query = sprintf('.//*[@itemscope and %d = count(ancestor-or-self::*[@itemscope])]', $count + 1); | |
return $this->x($query); | |
} | |
/** | |
* @return Micro[] | |
*/ | |
public function props() | |
{ | |
$count = count($this->x('ancestor-or-self::*[@itemscope]')); | |
$query = sprintf('.//*[@itemprop and %d + boolean(@itemscope) = count(ancestor-or-self::*[@itemscope])]', $count); | |
return $this->x($query); | |
} | |
/** | |
* @return Micro|null itemprop element | |
*/ | |
public function prop() | |
{ | |
return $this->y('(ancestor-or-self::*[@itemprop])[last()]'); | |
} | |
/** | |
* @return Micro|null itemscope element | |
*/ | |
public function scope() | |
{ | |
return $this->y('(ancestor-or-self::*[@itemscope])[last()]'); | |
} | |
/** | |
* @return string name of the proptery | |
*/ | |
public function name() | |
{ | |
return ($prop = $this->prop()) ? $prop->getName() : null; | |
} | |
/** | |
* @return string|null | |
*/ | |
public function content() | |
{ | |
if (!$this->isProp()) { | |
return null; | |
} | |
if ($this->isEmbed()) { | |
return $this->arrayCompactSingle($this->toArrayNative()); | |
} | |
if (isset($this->xml['content'])) { | |
return (string) $this->xml['content']; | |
} | |
return $this->text(); | |
} | |
public function text() | |
{ | |
return dom_import_simplexml($this->xml)->textContent; | |
} | |
/** | |
* @return string[] urls used as types in this document, first entry is NULL if there are untyped scopes | |
*/ | |
public function types() | |
{ | |
$types = array(); | |
$hasNull = false; | |
foreach ($this->xml->xpath('//*[@itemscope]') as $scoped) { | |
$type = $scoped['itemtype']; | |
if (null === $type) { | |
$hasNull = true; | |
} else { | |
$types[(string) $type] = 1; | |
} | |
} | |
$types = array_keys($types); | |
if ($hasNull) array_unshift($types, null); | |
return $types; | |
} | |
/** | |
* @return string url of the type | |
*/ | |
public function type() | |
{ | |
return (string) $this->f($this->xml->xpath('(ancestor-or-self::*[@itemscope])[last()]/@itemtype')); | |
} | |
/** | |
* @return bool | |
*/ | |
public function isEmbed() | |
{ | |
return $this->isProp() and $this->isScope(); | |
} | |
/** | |
* @return bool | |
*/ | |
public function isProp() | |
{ | |
return $this->xml[0] == (($prop = $this->prop()) ? $prop->simplexml() : $prop); | |
} | |
/** | |
* @return bool | |
*/ | |
public function isScope() | |
{ | |
return $this->xml[0] == (($scope = $this->scope()) ? $scope->simplexml() : $scope); | |
} | |
/** | |
* @return string | |
*/ | |
public function getName() | |
{ | |
return (string) $this->xml['itemprop']; | |
} | |
/** | |
* @return string | |
*/ | |
public function getType() | |
{ | |
return (string) $this->xml['itemtype']; | |
} | |
/** | |
* @return bool | |
*/ | |
public function hasEmbeddedItems() | |
{ | |
} | |
public function simplexml() | |
{ | |
return $this->xml; | |
} | |
public function __get($name) | |
{ | |
### say this is a root node of an XML/HTML element so we look for types first ### | |
$url = $name; | |
$url && (parse_url($url, PHP_URL_SCHEME) || $url = 'http://schema.org/' . $url); | |
foreach ($this->scopes() as $scope) { | |
if ($scope->type() == $url) { | |
return $scope; | |
} | |
} | |
### on a type element we look for properties first, right? ### | |
foreach ($this->props() as $prop) { | |
if ($prop->name() == $name) { | |
return $prop->content(); | |
} | |
} | |
if ($this->isProp()) { | |
return $this->content(); | |
} | |
### final fallback to exception | |
throw new Exception(sprintf('Wrong name "%s". Please check if it is properly written.', $name)); | |
} | |
/** | |
* @param $xml | |
* @return Micro | |
*/ | |
function __invoke($xml = null) | |
{ | |
if (null === $xml) { | |
return $this->xml; | |
} | |
if ($xml instanceof Micro) { | |
return $xml->simplexml(); | |
} | |
return new Micro($xml); | |
} | |
public function __toString() | |
{ | |
return is_string($content = $this->content()) ? $content : $this->text(); | |
} | |
public function debug($i = 0) | |
{ | |
$xml = $this->xml->asXML(); | |
return $i ? $xml : explode("\n", $xml, 2)[0]; | |
} | |
private function x($query) | |
{ | |
$array = array(); | |
foreach ($this->xml->xpath($query) as $result) { | |
$array[] = new self($result); | |
} | |
return $array; | |
} | |
private function xpath($query) | |
{ | |
return $this->xml->xpath($query); | |
} | |
/** | |
* return the first result | |
* | |
* @param string $query | |
* @return Micro|null if there is no first result | |
*/ | |
private function y($query) | |
{ | |
return $this->f($this->x($query)); | |
} | |
private function f($result) | |
{ | |
return $result ? $result[0] : null; | |
} | |
function jsonSerialize() | |
{ | |
$array = $this->toArrayNative(); | |
return $array ? $this->arrayCompactSingle($array) : $this->content(); | |
} | |
public function toArray() | |
{ | |
$array = $this->arrayCompactSingle($this->toArrayNative()); | |
foreach ($array as &$elememt) { | |
if ($elememt->isEmbed()) { | |
$elememt = $elememt->toArray(); | |
} else { | |
$elememt = $elememt->content(); | |
} | |
} | |
return $array; | |
} | |
private function toArrayNative() | |
{ | |
$array = array(); | |
if (!$this->scope()) { | |
foreach ($this->scopes() as $scope) { | |
$key = $scope->getType(); | |
$array[$key][] = $scope; | |
} | |
return $array; | |
} | |
if ($this->isScope()) { | |
foreach ($this->props() as $prop) { | |
$key = $prop->getName(); | |
$array[$key][] = $prop; | |
} | |
if (!$array) { | |
$array = array($this->text()); | |
} | |
return $array; | |
} | |
return $array; | |
} | |
private function arrayCompactSingle(array $array) | |
{ | |
foreach ($array as $key => $value) { | |
if (!is_array($value)) continue; | |
if (count($value) != 1) continue; | |
if (!array_key_exists(0, $value)) continue; | |
$array[$key] = $value[0]; | |
} | |
return $array; | |
} | |
public function getIterator() | |
{ | |
return new ArrayIterator($this->arrayCompactSingle($this->toArrayNative())); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment