Skip to content

Instantly share code, notes, and snippets.

@hakre
Created March 20, 2013 01:50
Show Gist options
  • Save hakre/5201711 to your computer and use it in GitHub Desktop.
Save hakre/5201711 to your computer and use it in GitHub Desktop.
schema.org microdata reading PHP class for use with DOMDocument or SimpleXML.
<?php
/*
* Micro Class For Micro Data
* Powered by libxml
*
* @author hakre <http://hakre.wordpress.com/>
* @version 0.7.1
*/
/**
* Micro Class For Micro Data
*/
class Micro implements JsonSerializable, IteratorAggregate
{
private $xml;
public function __construct($xml)
{
$this->xml = is_string($xml)
? (
$xml[0] === '<'
? simplexml_load_string($xml)
: simplexml_load_file($xml)
)
: (
($xml instanceof DOMNode)
? simplexml_import_dom($xml)
: $xml
);
}
/**
* @return Micro[]
*/
public function scopes()
{
$count = count($this->x('ancestor-or-self::*[@itemscope]'));
$query = sprintf('.//*[@itemscope and %d = count(ancestor-or-self::*[@itemscope])]', $count + 1);
return $this->x($query);
}
/**
* @return Micro[]
*/
public function props()
{
$count = count($this->x('ancestor-or-self::*[@itemscope]'));
$query = sprintf('.//*[@itemprop and %d + boolean(@itemscope) = count(ancestor-or-self::*[@itemscope])]', $count);
return $this->x($query);
}
/**
* @return Micro|null itemprop element
*/
public function prop()
{
return $this->y('(ancestor-or-self::*[@itemprop])[last()]');
}
/**
* @return Micro|null itemscope element
*/
public function scope()
{
return $this->y('(ancestor-or-self::*[@itemscope])[last()]');
}
/**
* @return string name of the proptery
*/
public function name()
{
return ($prop = $this->prop()) ? $prop->getName() : null;
}
/**
* @return string|null
*/
public function content()
{
if (!$this->isProp()) {
return null;
}
if ($this->isEmbed()) {
return $this->arrayCompactSingle($this->toArrayNative());
}
if (isset($this->xml['content'])) {
return (string) $this->xml['content'];
}
return $this->text();
}
public function text()
{
return dom_import_simplexml($this->xml)->textContent;
}
/**
* @return string[] urls used as types in this document, first entry is NULL if there are untyped scopes
*/
public function types()
{
$types = array();
$hasNull = false;
foreach ($this->xml->xpath('//*[@itemscope]') as $scoped) {
$type = $scoped['itemtype'];
if (null === $type) {
$hasNull = true;
} else {
$types[(string) $type] = 1;
}
}
$types = array_keys($types);
if ($hasNull) array_unshift($types, null);
return $types;
}
/**
* @return string url of the type
*/
public function type()
{
return (string) $this->f($this->xml->xpath('(ancestor-or-self::*[@itemscope])[last()]/@itemtype'));
}
/**
* @return bool
*/
public function isEmbed()
{
return $this->isProp() and $this->isScope();
}
/**
* @return bool
*/
public function isProp()
{
return $this->xml[0] == (($prop = $this->prop()) ? $prop->simplexml() : $prop);
}
/**
* @return bool
*/
public function isScope()
{
return $this->xml[0] == (($scope = $this->scope()) ? $scope->simplexml() : $scope);
}
/**
* @return string
*/
public function getName()
{
return (string) $this->xml['itemprop'];
}
/**
* @return string
*/
public function getType()
{
return (string) $this->xml['itemtype'];
}
/**
* @return bool
*/
public function hasEmbeddedItems()
{
}
public function simplexml()
{
return $this->xml;
}
public function __get($name)
{
### say this is a root node of an XML/HTML element so we look for types first ###
$url = $name;
$url && (parse_url($url, PHP_URL_SCHEME) || $url = 'http://schema.org/' . $url);
foreach ($this->scopes() as $scope) {
if ($scope->type() == $url) {
return $scope;
}
}
### on a type element we look for properties first, right? ###
foreach ($this->props() as $prop) {
if ($prop->name() == $name) {
return $prop->content();
}
}
if ($this->isProp()) {
return $this->content();
}
### final fallback to exception
throw new Exception(sprintf('Wrong name "%s". Please check if it is properly written.', $name));
}
/**
* @param $xml
* @return Micro
*/
function __invoke($xml = null)
{
if (null === $xml) {
return $this->xml;
}
if ($xml instanceof Micro) {
return $xml->simplexml();
}
return new Micro($xml);
}
public function __toString()
{
return is_string($content = $this->content()) ? $content : $this->text();
}
public function debug($i = 0)
{
$xml = $this->xml->asXML();
return $i ? $xml : explode("\n", $xml, 2)[0];
}
private function x($query)
{
$array = array();
foreach ($this->xml->xpath($query) as $result) {
$array[] = new self($result);
}
return $array;
}
private function xpath($query)
{
return $this->xml->xpath($query);
}
/**
* return the first result
*
* @param string $query
* @return Micro|null if there is no first result
*/
private function y($query)
{
return $this->f($this->x($query));
}
private function f($result)
{
return $result ? $result[0] : null;
}
function jsonSerialize()
{
$array = $this->toArrayNative();
return $array ? $this->arrayCompactSingle($array) : $this->content();
}
public function toArray()
{
$array = $this->arrayCompactSingle($this->toArrayNative());
foreach ($array as &$elememt) {
if ($elememt->isEmbed()) {
$elememt = $elememt->toArray();
} else {
$elememt = $elememt->content();
}
}
return $array;
}
private function toArrayNative()
{
$array = array();
if (!$this->scope()) {
foreach ($this->scopes() as $scope) {
$key = $scope->getType();
$array[$key][] = $scope;
}
return $array;
}
if ($this->isScope()) {
foreach ($this->props() as $prop) {
$key = $prop->getName();
$array[$key][] = $prop;
}
if (!$array) {
$array = array($this->text());
}
return $array;
}
return $array;
}
private function arrayCompactSingle(array $array)
{
foreach ($array as $key => $value) {
if (!is_array($value)) continue;
if (count($value) != 1) continue;
if (!array_key_exists(0, $value)) continue;
$array[$key] = $value[0];
}
return $array;
}
public function getIterator()
{
return new ArrayIterator($this->arrayCompactSingle($this->toArrayNative()));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment