Instantly share code, notes, and snippets.
Created
December 30, 2012 20:52
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
Save hakre/4415105 to your computer and use it in GitHub Desktop.
Some very rudimentary Tagsoup Example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Tagsoup | |
*/ | |
class Tagsoup | |
{ | |
const NODETYPE_COMMENT = 1; | |
const NODETYPE_TEXT = 2; | |
const NODETYPE_STARTTAG = 3; | |
const NODETYPE_ENDTAG = 4; | |
public static function parseAt($string, $offset) { | |
$mode = 'is'; | |
$tokens = [ | |
self::NODETYPE_COMMENT => '<!--.*-->', | |
self::NODETYPE_STARTTAG => '<(?:[a-z]+:)?[a-z]+(\s+(?:[a-z]+:)?[a-z]+="[^"]+")*\s*>', | |
self::NODETYPE_ENDTAG => '<\/(?:[a-z]+:)?[a-z]+\s*>', | |
]; | |
$rest = self::NODETYPE_TEXT; | |
$found = strlen($string); | |
$foundToken = FALSE; | |
if ($offset >= $found) { | |
return $foundToken; | |
} | |
foreach ($tokens as $name => $token) { | |
$r = preg_match("/$token/$mode", $string, $matches, PREG_OFFSET_CAPTURE, $offset); | |
if ($r) { | |
$position = $matches[0][1]; | |
if ($position < $found) { | |
$found = $position; | |
$foundToken = array($name, $position, strlen($matches[0][0]), $matches[0][0]); | |
} | |
} | |
} | |
if ($found > $offset) { | |
$buffer = substr($string, $offset, $found - $offset); | |
$foundToken = array($rest, $offset, strlen($buffer), $buffer); | |
} | |
return $foundToken; | |
} | |
} | |
class TagsoupNode extends Tagsoup | |
{ | |
protected $type; | |
protected $string; | |
protected $length; | |
protected $offset; | |
protected function __construct($type, $offset, $length, $string) { | |
$this->type = $type; | |
$this->offset = $offset; | |
$this->length = $length; | |
$this->string = $string; | |
} | |
/** | |
* @param $array | |
* @return TagsoupNode | |
*/ | |
public static function createFromArray(array $array) { | |
list($type, $offset, $length, $string) = $array; | |
return self::create($type, $offset, $length, $string); | |
} | |
/** | |
* @param int $type | |
* @param int $offset | |
* @param int $length | |
* @param string $string | |
* @return TagsoupNode | |
* @throws InvalidArgumentException | |
*/ | |
public static function create($type, $offset, $length, $string) { | |
$class = ''; | |
switch ($type) { | |
case self::NODETYPE_TEXT: | |
case self::NODETYPE_COMMENT: | |
$typeName = 'Node'; | |
break; | |
case self::NODETYPE_STARTTAG: | |
$typeName = 'Tag'; | |
break; | |
case self::NODETYPE_ENDTAG: | |
$typeName = 'Tag'; | |
break; | |
default: | |
throw new InvalidArgumentException(sprintf("Unkown Type '%s'.", $type)); | |
} | |
/* @var $node TagsoupNode */ | |
$class = 'Tagsoup' . $typeName; | |
$node = new $class($type, $offset, $length, $string); | |
return $node; | |
} | |
public function getType() { | |
return $this->type; | |
} | |
public function getStart() { | |
return $this->offset; | |
} | |
public function getEnd() { | |
return $this->offset + $this->length; | |
} | |
public function getLength() { | |
return $this->length; | |
} | |
public function __toString() { | |
return $this->string; | |
} | |
} | |
class TagsoupTag extends TagsoupNode | |
{ | |
public function getTagName() { | |
preg_match('/^<\/?((?:[a-z]+:)?[a-z]+)/is', $this->string, $m); | |
return $m[1]; | |
} | |
public function getTagNsPrefix() { | |
$tagName = $this->getTagName(); | |
if (false === $pos = strpos($tagName, ':')) | |
return FALSE; | |
return substr($tagName, 0, $pos); | |
} | |
} | |
class TagsoupIterator implements Iterator | |
{ | |
private $string; | |
private $startOffset; | |
private $offset; | |
private $index; | |
public function __construct($string, $startOffset = 0) { | |
$this->string = $string; | |
$this->offset = $this->startOffset = $startOffset; | |
$this->index = 0; | |
} | |
public function getOffset() { | |
return $this->offset; | |
} | |
/** | |
* @return mixed|TagsoupNode | |
*/ | |
public function current() { | |
$result = Tagsoup::parseAt($this->string, $this->offset); | |
return TagsoupNode::createFromArray($result); | |
} | |
public function next() { | |
$this->offset = $this->current()->getEnd(); | |
$this->index++; | |
} | |
public function key() { | |
return $this->index; | |
} | |
public function valid() { | |
return (bool)Tagsoup::parseAt($this->string, $this->offset); | |
} | |
public function rewind() { | |
$this->offset = $this->startOffset; | |
$this->index = 0; | |
} | |
} | |
// $test = Tagsoup::parseAt($string, 0); | |
// var_dump($test, tagsoupNode::createFromArray($test)); | |
class TagsoupForwardNavigator extends IteratorIterator | |
{ | |
private $it; | |
public function __construct(TagsoupIterator $it) { | |
$it->rewind(); | |
$this->it = $it; | |
parent::__construct(new NoRewindIterator($it)); | |
} | |
/** | |
* @param $name | |
* @return TagsoupNode | |
*/ | |
public function nextStartTag($name) { | |
foreach ($this as $node) { | |
if ($node->getType() === Tagsoup::NODETYPE_STARTTAG && $node->getTagName() === $name) { | |
return $node; | |
} | |
} | |
} | |
/** | |
* @param $name | |
* @return TagsoupNode[] | |
*/ | |
public function getUntilEndTag($name) { | |
$result = []; | |
foreach ($this as $node) { | |
if ($node->getType() === Tagsoup::NODETYPE_ENDTAG && $node->getTagName() === $name) { | |
break; | |
} | |
$result[] = $node; | |
} | |
return $result; | |
} | |
public function nextCondition(callable $condition) { | |
foreach ($this as $node) { | |
if ($condition($node, $this)) return $node; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment