dantman · October 24, 2012 03:14
diff --git a/TemplateParser.php b/TemplateParser.php
 <?php

 class TemplateNode {
 }

 class TemplateText extends TemplateNode {

 }

 class TemplateCondition extends TemplateNode {}
 class TemplateFunction extends TemplateNode {}
 class TemplateSubstitution extends TemplateNode {}

 class TemplateBlock extends TemplateNode {

 	private $children;

 	public function __construct() {
 		$this->children = new SplDoublyLinkedList();
 	}

 	public function append( TemplateNode $node ) {
 		$this->children->push( $node );
 	}

 }

 class TemplateElement extends TemplateBlock {
 	
 }

 class TemplateDocument extends TemplateBlock {

 }


 class Match {

 	private $parser;
 	private $start;
 	private $matches;

 	public function __construct( $parser, $start, $matches ) {
 		$this->parser  = $parser;
 		$this->start   = $start;
 		$this->matches = $matches;
 	}

 	public function group( $id ) {
 		return $this->matches[$id];
 	}

 	public function text() {
 		return $this->group( 0 );
 	}

 	public function length() {
 		return strlen( $this->text() );
 	}

 	public function rollback() {
 		$this->parser->rollback( $this->length(), $this->start );
 	}
 }

 class Regexp {

 	private $source;

 	public function __construct( $source ) {
 		$this->source = $source;
 	}

 	public function source() {
 		return $this->source;
 	}

 }

 class AParser {

 	protected $string;
 	protected $pos;

 	public function __construct( $string ) {
 		$this->string = $string;
 		$this->pos = 0;
 	}

 	/**
 	 * @protected
 	 */
 	function rollback( $chars, $to ) {
 		if ( $this->pos - $chars !== $to ) {
 			throw new Exception( __METHOD__ . ': Parser implementation error. A rollback was atempted after more data had been parsed.' );
 		}
 		$this->pos -= $chars;
 	}

 	protected function makeRegexp( $test ) {
 		if ( is_string( $test ) ) {
 			$re = '/';
 			$re .= preg_quote( $test, '/' );
 			$re .= '/';
 			return $re;
 		} elseif ( $test instanceof Regexp ) {
 			return $test->source();
 		} else {
 			throw new Exception( __METHOD__ . ': Parser implementation error. Unknown type of data was patched to a parser match function.' );
 		}
 	}

 	protected function reMatch( $re ) {
 		$re .= 'ADsu';
 		$m = null;
 		if ( preg_match( $re, $this->string, $m, 0, $this->pos ) ) {
 			return new Match( $this, $this->pos, $m );
 		} else {
 			return false;
 		}
 	}

 	protected function nextIs( $test ) {
 		$re = $this->makeRegexp( $test );
 		$m = null;
 		if( $this->reMatch( $re ) ) {
 			return true;
 		} else {
 			return false;
 		}
 	}

 	protected function consume( $test ) {
 		$re = $this->makeRegexp( $test );
 		if( $m = $this->reMatch( $re ) ) {
 			$this->pos += $m->length();
 			return $m;
 		} else {
 			return false;
 		}
 	}

 	protected function capture( $test ) {
 		$m = $this->consume( $test );
 		if ( $m ) {
 			return $m->text();
 		} else {
 			return false;
 		}
 	}

 	protected function reap( $test, $msg ) {
 		$m = $this->consume( $test );
 		$this->assert( $m, $msg );
 		return $m;
 	}

 	// protected function parseOne( $list ) {
 	// 	foreach ( $list as $token ) {
 	// 		$tokenMethod = 'token' . ucfirst( $token );
 	// 		$res = $this->{$tokenMethod}();
 	// 		if ( $res === false ) {
 	// 			continue;
 	// 		}
 	// 		return $res;
 	// 	}
 	// 	// error?
 	// 	return false;
 	// }

 	protected function ws() {
 		if ( $m = $this->reMatch( '/\s+/' ) ) {
 			$this->pos += $m->length();
 			return true;
 		}
 		return false;
 	}

 	protected function following() {
 		return substr( $this->string, $this->pos, 64 );
 	}

 	protected function assert( $test, $msg ) {
 		if ( !$test ) {
 			$this->error( $msg );
 		}
 	}

 	protected function error( $msg ) {
 		// @todo line and char numbers
 		// @fixme Use a real ParseError class
 		throw new Exception( "ParseError: Parser encountered an error while parsing. Error message: \"$msg\"; Parsing stopped at \"{$this->following()}\"." );
 	}
 }

 class Token {

 	public function __construct( $tokenType ) {
 		$this->tokenType = $tokenType;
 	}

 }

 class TemplateParser extends AParser {

 	public function parseDocument() {
 		$this->doc = new TemplateDocument();
 		$this->tagStack = new SplStack();
 		$this->tagStack->push( $this->doc );
 		$this->rule = (object)array(
 			'tagname' => new Regexp( '/[a-z][_a-z0-9]*/i' ),
 			'attrName' => new Regexp( '/(mw:)?[a-z][-_a-z0-9]*/i' ),
 			'funcName' => new Regexp( '/([a-z][-_a-z0-9]*):/i' ),
 			'conditional' => new Regexp( '/(if|unless)\s+/' ),
 		);
 		while( true ) {
 			if ( $this->tokenText( array( 'mode' => 'html', 'emit' => true ) ) ) {}
 			elseif ( $this->tokenTag() ) {}
 			else {
 				break;
 			}
 		}
 		if ( $this->pos !== strlen( $this->string ) ) {
 			throw new Exception( "Parser was unable to finish parsing. Syntax continuation could not be found around: \"{$this->following()}\"" );
 		}
 		return $this->doc;
 	}

 	protected function emit( $token ) {
 		switch ( $token->tokenType ) {
 		case 'tag':
 			if ( $token->open ) {
 				$parent = $this->tagStack->top();
 				$element = new TemplateElement();
 				$element->name = $token->name;
 				$element->nameHint = $token->nameHint;
 				$parent->append( $element );
 				$this->tagStack->push( $element );
 			} else {
 				$element = $this->tagStack->top();
 				if ( $token->name ) {
 					$this->assert( $element->name, "Cannot close a <{$token->name}> element. A null <{$element->nameHint}> has not yet been closed." );
 					// @fixme Instead we should be doing some html-like implicit close handling
 					$this->assert( $token->name && $element->name,
 						"Closing tag <{$token->name}> did not match the currently opened <{$element->name}>." );
 					$this->tagStack->pop();
 				} else {
 					if ( !$element->name ) {
 						$hintmatch = $token->nameHint === ""
 							|| $token->nameHint === "mw"
 							|| $token->nameHint === "mw:"
 							|| $token->nameHint === $element->nameHint;
 						$this->assert( $hintmatch, "Null element closing tag </{$token->nameHint}> did not match the currently opened <{$element->nameHint}>." );
 						$this->tagStack->pop();
 					} else {
 						$this->error( "Cannot close a null <{$token->nameHint}> element. A standard <{$element->name}> has not yet been closed." );
 					}
 				}
 			}
 			break;
 		case 'text':
 			$node = new TemplateText();
 			$this->tagStack->top()->append( $node );
 			break;
 		case 'cond':
 			$node = new TemplateCondition();
 			$this->tagStack->top()->append( $node );
 			break;
 		case 'func':
 			$node = new TemplateFunction();
 			$this->tagStack->top()->append( $node );
 			break;
 		case 'subst':
 			$node = new TemplateSubstitution();
 			$this->tagStack->top()->append( $node );
 			break;
 		default:
 			throw new Exception( __METHOD__ . ': Tried to emit an unknown token type.' );
 		}
 	}

 	protected function tokenTag() {

 		if ( !$this->consume( "<" ) ) {
 			// No tag to parse, move on to another type of token
 			return false;
 		}

 		$closed = false; // Have we found the ending > yet?
 		$endTag = false; // Is this a </foo> end tag?
 		$selfClosing = false; // Is this a self closing <foo />?
 		// State indicating that whitespace was consumed before the current token.
 		// Used by attribute parsing to make sure there is whitespace preceding an attribute
 		$wsConsumed = false;

 		if ( $_ = $this->consume( '/' ) ) {
 			$endTag = true;
 		}

 		$tag = new Token( 'tag' );
 		$tag->open = !$endTag;
 		$tag->nameHint = null;

 		if ( $_ = $this->consume( 'mw:' ) ) {
 			// Go back to before the mw: so we can parse it as an argument name.
 			$_->rollback();
 			$tag->name = null;
 			if ( $endTag ) {
 				$m = $this->consume( $this->rule->attrName );
 				if ( !$m ) {
 					$m = $this->consume( new Regexp( '/mw:?/' ) );
 				}
 				if ( $m ) {
 					$tag->nameHint = $m->text();
 				} else {
 					$tag->nameHint = '';
 				}
 			} else {
 				$m = $this->consume( $this->rule->attrName );
 				$this->assert( $m, "Could not parse a required attribute name out of a null tag." );
 				// Rollback the argument name so we can parse an actual argument.
 				$m->rollback();
 				$tag->nameHint = $m->text();
 				// There is no tag name so pretend that whitespace has been consumed so we can parse attributes.
 				$wsConsumed = true;
 			}
 		} else {
 			$tagname = $this->capture( $this->rule->tagname );
 			// @fixme Message does not handle the case where a / was found and now we want the tag name.
 			$this->assert( $tagname, "A < indicating the start of a tag was found but none of \"/\", a tag name, or the start of a null mw: tag could be found." );
 			$tag->name = $tagname;
 		}
 		$tag->attributes = array();

 		while ( !$closed ) {
 			if ( $this->consume( '>' ) ) {
 				$closed = true;
 			} elseif ( $this->consume( '/' ) ) {
 				if ( $endTag ) {
 					// @todo Differentiate between "Last consumed token is an error" and "Could not find anything following this matching expectations"
 					$this->error( "An end tag may not have a self closing /." );
 				}
 				$selfClosing = true;
 				$this->reap( '>', "A closing > must directly follow a self closing tag's /." );
 				$closed = true;
 			} elseif ( $this->ws() ) {
 				// Mark whitespace as consumed so we know we can consume an attribute
 				$wsConsumed = true;
 			} elseif ( $wsConsumed && $attrName = $this->consume( $this->rule->attrName ) ) {
 				$wsConsumed = false;
 				$this->assert( !$endTag, "End tags may not have attributes." );
 				if ( $this->consume( '=' ) ) {
 					$q = $this->consume( new Regexp( '/[\'"]/' ) );
 					if ( $q ) {
 						$text = $this->tokenText( array( 'mode' => 'attr-quoted', 'forquote' => $q->text(), 'empty' => true ) );
 						if ( !$this->consume( $q->text() ) ) {
 							$this->error( "Unexpected character found while trying to find an attribute's closing quote." );
 						}
 					} else {
 						$text = $this->tokenText( array( 'mode' => 'attr-unquoted' ) );
 						$this->assert( $text, "An unqouted attribute was found missing it's text." );
 					}
 				} else {
 					$text = true;// Just a truthy attribute
 				}
 				$tag->attributes[] = array(
 					'name' => $attrName->text(),
 					'value' => $text
 				);
 			} else {
 				$this->error( "Unexpected characters found while parsing a tag." );
 			}
 		}

 		// @todo Special handling for script and style tags that use a different parse model

 		$this->emit( $tag );

 		if ( $selfClosing ) {
 			$close = new Token( 'tag' );
 			$close->open = false;
 			$close->name = $tag->name;
 			$close->nameHint = $tag->nameHint;
 			$this->emit( $close );
 		}

 		return true;
 	}

 	public function tokenText( $options = array( 'mode' => 'html' ) ) {
 		$allquotes = "'" . '"' . '`';
 		// Handle options
 		$defaults = array(
 			'quotes' => true,
 			'|' => true,
 			'=' => true,
 			'/' => true,
 			'whitespace' => true,
 			'func' => true,
 			'subst' => true,
 			'conditional' => false,
 			'empty' => false,
 			'emit' => false,
 		);
 		foreach ( $defaults as $key => $default ) {
 			if ( !array_key_exists( $key, $options ) ) {
 				$options[$key] = $default;
 			}
 		}
 		if ( isset( $options['mode'] ) ) {
 			switch ( $options['mode'] ) {
 			case 'html':
 				break;
 			case 'attr-unquoted':
 				$options['func'] = false;
 				$options['subst'] = false;
 				$options['quotes'] = false;
 				$options['whitespace'] = false;
 				$options['='] = false;
 				$options['/'] = false;
 				break;
 			case 'attr-quoted':
 				$options['conditional'] = true;
 				// Use a string replace trick to list the quotes that aren't the same as the one matched
 				$options['quotes'] = str_replace( $options['forquote'], '', $allquotes );
 				break;
 			case 'expr':
 				$options['|'] = false;
 				break;
 			case 'block-expr':
 				$options['conditional'] = true;
 				break;
 			default:
 				throw new Exception( __METHOD__ . ': Unknown mode.' );
 				break;
 			}
 			unset( $options['mode'] );
 		}
 		// Build plaintext catching regexp
 		$re = '/[^';
 		// Never parse a < or >
 		$re .= '<>';
 		// & is handled exclusively by our entity handling code
 		$re .= '&';
 		// Curly braces are exclusively used by func, subst, and conditional syntax
 		$re .= '{}';
 		// If pipe is not permitted (eg: in a curly expr) don't allow it
 		if ( !$options['|'] ) {
 			$re .= '|';
 		}
 		// If = is not permitted (eg: in an unquoted attribute) don't allow it
 		if ( !$options['='] ) {
 			$re .= '=';
 		}
 		// If / is not permitted (eg: in an unquoted attribute) don't allow it
 		if ( !$options['/'] ) {
 			$re .= '\/';
 		}
 		// If whitespace is not permitted don't allow it
 		if ( !$options['whitespace'] ) {
 			$re .= '\s';
 		}
 		// 
 		$quotes = $options['quotes'];
 		if ( $quotes === true ) {
 			$quotes = str_split( $allquotes );
 		} elseif ( $quotes === false ) {
 			$quotes = array();
 		} else {
 			$quotes = str_split( $quotes );
 		}
 		$badquotes = array_diff( str_split( $allquotes ), $quotes );
 		foreach ( $badquotes as $quote ) {
 			$re .= $quote;
 		}
 		$re .= ']+/';
 		$re = new Regexp( $re );

 		// Start parsing plaintext and curly expressions
 		$nodes = array();
 		while ( true ) {
 			if ( $m = $this->consume( $re ) ) {
 				$text = new Token( 'text' );
 				$text->text = $m->text();
 				$nodes[] = $text;
 			} elseif ( $curly = $this->consume( '{' ) ) {
 				// @fixme This code doesn't consult the options to test if something is allowed
 				if ( $m = $this->consume( '/' ) ) {
 					// Looks like an end tag. Exit this text handling so that the parent condition (if any) can handle it.
 					$m->rollback();
 					$curly->rollback();
 					break;
 				} elseif ( $m = $this->consume( 'else}' ) ) {
 					// Looks like an {else} tag. Exit this text handling so that the parent condition (if any) can handle it.
 					$m->rollback();
 					$curly->rollback();
 					break;
 				} elseif ( $m = $this->consume( $this->rule->conditional ) ) {
 					// Conditional
 					$condition = $m->group( 1 );
 					$cond = new Token( 'cond' );
 					$cond->condition = $condition;
 					$cond->test = $this->tokenText( array( 'mode' => 'expr' ) );
 					$this->assert( $cond->test, "Unexpected characters found while parsing a condition expression." );
 					$this->reap( '}', "Unexpected characters found while parsing a condition." );
 					$cond->then = $this->tokenText( array( 'mode' => 'block-expr', 'empty' => true ) );
 					if ( $this->consume( '{else}' ) ) {
 						$cond->else = $this->tokenText( array( 'mode' => 'block-expr', 'empty' => true ) );
 					}
 					$this->reap( '{/', "Unexpected characters found while parsing conditional text." );
 					$m = $this->consume( new Regexp( '/([a-z][-_a-z0-9]*)\}/i' ) );
 					$this->assert( $m, "Unexpected characters found while parsing a condition end." );
 					$this->assert( $m->group( 1 ) == $cond->condition, "Condition end name did not match the name of the opened condition." );
 					$nodes[] = $cond;
 				} elseif ( $m = $this->consume( $this->rule->funcName ) ) {
 					// func
 					$funcName = $m->group( 1 );
 					$func = new Token( 'func' );
 					$func->name = $funcName;
 					$func->text = $this->tokenText( array( 'mode' => 'expr' ) );
 					$this->assert( $func->text, "Unexpected characters found while parsing a function expression." );
 					$this->reap( '}', "Unexpected characters found while parsing a function." );
 					$nodes[] = $func;
 				} else {
 					// subst
 					$subst = new Token( 'subst' );
 					$subst->text = $this->tokenText( array( 'mode' => 'expr' ) );
 					$this->assert( $subst->text, "Unexpected characters found while parsing a substitution expression." );
 					$this->reap( '}', "Unexpected characters found while parsing a substitution." );
 					$nodes[] = $subst;
 				}
 			} else {
 				break;
 			}
 		}
 		if ( $options['emit'] ) {
 			foreach ( $nodes as $node ) {
 				$this->emit( $node );
 			}
 		}
 		if ( !$options['empty'] && count( $nodes ) <= 0 ) {
 			return false;
 		}
 		return $nodes;
 	}

 }
	<?php

	class TemplateNode {
	}

	class TemplateText extends TemplateNode {

	}

	class TemplateCondition extends TemplateNode {}
	class TemplateFunction extends TemplateNode {}
	class TemplateSubstitution extends TemplateNode {}

	class TemplateBlock extends TemplateNode {

	private $children;

	public function __construct() {
	$this->children = new SplDoublyLinkedList();
	}

	public function append( TemplateNode $node ) {
	$this->children->push( $node );
	}

	}

	class TemplateElement extends TemplateBlock {

	}

	class TemplateDocument extends TemplateBlock {

	}


	class Match {

	private $parser;
	private $start;
	private $matches;

	public function __construct( $parser, $start, $matches ) {
	$this->parser = $parser;
	$this->start = $start;
	$this->matches = $matches;
	}

	public function group( $id ) {
	return $this->matches[$id];
	}

	public function text() {
	return $this->group( 0 );
	}

	public function length() {
	return strlen( $this->text() );
	}

	public function rollback() {
	$this->parser->rollback( $this->length(), $this->start );
	}
	}

	class Regexp {

	private $source;

	public function __construct( $source ) {
	$this->source = $source;
	}

	public function source() {
	return $this->source;
	}

	}

	class AParser {

	protected $string;
	protected $pos;

	public function __construct( $string ) {
	$this->string = $string;
	$this->pos = 0;
	}

	/**
	* @protected
	*/
	function rollback( $chars, $to ) {
	if ( $this->pos - $chars !== $to ) {
	throw new Exception( __METHOD__ . ': Parser implementation error. A rollback was atempted after more data had been parsed.' );
	}
	$this->pos -= $chars;
	}

	protected function makeRegexp( $test ) {
	if ( is_string( $test ) ) {
	$re = '/';
	$re .= preg_quote( $test, '/' );
	$re .= '/';
	return $re;
	} elseif ( $test instanceof Regexp ) {
	return $test->source();
	} else {
	throw new Exception( __METHOD__ . ': Parser implementation error. Unknown type of data was patched to a parser match function.' );
	}
	}

	protected function reMatch( $re ) {
	$re .= 'ADsu';
	$m = null;
	if ( preg_match( $re, $this->string, $m, 0, $this->pos ) ) {
	return new Match( $this, $this->pos, $m );
	} else {
	return false;
	}
	}

	protected function nextIs( $test ) {
	$re = $this->makeRegexp( $test );
	$m = null;
	if( $this->reMatch( $re ) ) {
	return true;
	} else {
	return false;
	}
	}

	protected function consume( $test ) {
	$re = $this->makeRegexp( $test );
	if( $m = $this->reMatch( $re ) ) {
	$this->pos += $m->length();
	return $m;
	} else {
	return false;
	}
	}

	protected function capture( $test ) {
	$m = $this->consume( $test );
	if ( $m ) {
	return $m->text();
	} else {
	return false;
	}
	}

	protected function reap( $test, $msg ) {
	$m = $this->consume( $test );
	$this->assert( $m, $msg );
	return $m;
	}

	// protected function parseOne( $list ) {
	// foreach ( $list as $token ) {
	// $tokenMethod = 'token' . ucfirst( $token );
	// $res = $this->{$tokenMethod}();
	// if ( $res === false ) {
	// continue;
	// }
	// return $res;
	// }
	// // error?
	// return false;
	// }

	protected function ws() {
	if ( $m = $this->reMatch( '/\s+/' ) ) {
	$this->pos += $m->length();
	return true;
	}
	return false;
	}

	protected function following() {
	return substr( $this->string, $this->pos, 64 );
	}

	protected function assert( $test, $msg ) {
	if ( !$test ) {
	$this->error( $msg );
	}
	}

	protected function error( $msg ) {
	// @todo line and char numbers
	// @fixme Use a real ParseError class
	throw new Exception( "ParseError: Parser encountered an error while parsing. Error message: \"$msg\"; Parsing stopped at \"{$this->following()}\"." );
	}
	}

	class Token {

	public function __construct( $tokenType ) {
	$this->tokenType = $tokenType;
	}

	}

	class TemplateParser extends AParser {

	public function parseDocument() {
	$this->doc = new TemplateDocument();
	$this->tagStack = new SplStack();
	$this->tagStack->push( $this->doc );
	$this->rule = (object)array(
	'tagname' => new Regexp( '/[a-z][_a-z0-9]*/i' ),
	'attrName' => new Regexp( '/(mw:)?[a-z][-_a-z0-9]*/i' ),
	'funcName' => new Regexp( '/([a-z][-_a-z0-9]*):/i' ),
	'conditional' => new Regexp( '/(if\|unless)\s+/' ),
	);
	while( true ) {
	if ( $this->tokenText( array( 'mode' => 'html', 'emit' => true ) ) ) {}
	elseif ( $this->tokenTag() ) {}
	else {
	break;
	}
	}
	if ( $this->pos !== strlen( $this->string ) ) {
	throw new Exception( "Parser was unable to finish parsing. Syntax continuation could not be found around: \"{$this->following()}\"" );
	}
	return $this->doc;
	}

	protected function emit( $token ) {
	switch ( $token->tokenType ) {
	case 'tag':
	if ( $token->open ) {
	$parent = $this->tagStack->top();
	$element = new TemplateElement();
	$element->name = $token->name;
	$element->nameHint = $token->nameHint;
	$parent->append( $element );
	$this->tagStack->push( $element );
	} else {
	$element = $this->tagStack->top();
	if ( $token->name ) {
	$this->assert( $element->name, "Cannot close a <{$token->name}> element. A null <{$element->nameHint}> has not yet been closed." );
	// @fixme Instead we should be doing some html-like implicit close handling
	$this->assert( $token->name && $element->name,
	"Closing tag <{$token->name}> did not match the currently opened <{$element->name}>." );
	$this->tagStack->pop();
	} else {
	if ( !$element->name ) {
	$hintmatch = $token->nameHint === ""
	\|\| $token->nameHint === "mw"
	\|\| $token->nameHint === "mw:"
	\|\| $token->nameHint === $element->nameHint;
	$this->assert( $hintmatch, "Null element closing tag </{$token->nameHint}> did not match the currently opened <{$element->nameHint}>." );
	$this->tagStack->pop();
	} else {
	$this->error( "Cannot close a null <{$token->nameHint}> element. A standard <{$element->name}> has not yet been closed." );
	}
	}
	}
	break;
	case 'text':
	$node = new TemplateText();
	$this->tagStack->top()->append( $node );
	break;
	case 'cond':
	$node = new TemplateCondition();
	$this->tagStack->top()->append( $node );
	break;
	case 'func':
	$node = new TemplateFunction();
	$this->tagStack->top()->append( $node );
	break;
	case 'subst':
	$node = new TemplateSubstitution();
	$this->tagStack->top()->append( $node );
	break;
	default:
	throw new Exception( __METHOD__ . ': Tried to emit an unknown token type.' );
	}
	}

	protected function tokenTag() {

	if ( !$this->consume( "<" ) ) {
	// No tag to parse, move on to another type of token
	return false;
	}

	$closed = false; // Have we found the ending > yet?
	$endTag = false; // Is this a </foo> end tag?
	$selfClosing = false; // Is this a self closing <foo />?
	// State indicating that whitespace was consumed before the current token.
	// Used by attribute parsing to make sure there is whitespace preceding an attribute
	$wsConsumed = false;

	if ( $_ = $this->consume( '/' ) ) {
	$endTag = true;
	}

	$tag = new Token( 'tag' );
	$tag->open = !$endTag;
	$tag->nameHint = null;

	if ( $_ = $this->consume( 'mw:' ) ) {
	// Go back to before the mw: so we can parse it as an argument name.
	$_->rollback();
	$tag->name = null;
	if ( $endTag ) {
	$m = $this->consume( $this->rule->attrName );
	if ( !$m ) {
	$m = $this->consume( new Regexp( '/mw:?/' ) );
	}
	if ( $m ) {
	$tag->nameHint = $m->text();
	} else {
	$tag->nameHint = '';
	}
	} else {
	$m = $this->consume( $this->rule->attrName );
	$this->assert( $m, "Could not parse a required attribute name out of a null tag." );
	// Rollback the argument name so we can parse an actual argument.
	$m->rollback();
	$tag->nameHint = $m->text();
	// There is no tag name so pretend that whitespace has been consumed so we can parse attributes.
	$wsConsumed = true;
	}
	} else {
	$tagname = $this->capture( $this->rule->tagname );
	// @fixme Message does not handle the case where a / was found and now we want the tag name.
	$this->assert( $tagname, "A < indicating the start of a tag was found but none of \"/\", a tag name, or the start of a null mw: tag could be found." );
	$tag->name = $tagname;
	}
	$tag->attributes = array();

	while ( !$closed ) {
	if ( $this->consume( '>' ) ) {
	$closed = true;
	} elseif ( $this->consume( '/' ) ) {
	if ( $endTag ) {
	// @todo Differentiate between "Last consumed token is an error" and "Could not find anything following this matching expectations"
	$this->error( "An end tag may not have a self closing /." );
	}
	$selfClosing = true;
	$this->reap( '>', "A closing > must directly follow a self closing tag's /." );
	$closed = true;
	} elseif ( $this->ws() ) {
	// Mark whitespace as consumed so we know we can consume an attribute
	$wsConsumed = true;
	} elseif ( $wsConsumed && $attrName = $this->consume( $this->rule->attrName ) ) {
	$wsConsumed = false;
	$this->assert( !$endTag, "End tags may not have attributes." );
	if ( $this->consume( '=' ) ) {
	$q = $this->consume( new Regexp( '/[\'"]/' ) );
	if ( $q ) {
	$text = $this->tokenText( array( 'mode' => 'attr-quoted', 'forquote' => $q->text(), 'empty' => true ) );
	if ( !$this->consume( $q->text() ) ) {
	$this->error( "Unexpected character found while trying to find an attribute's closing quote." );
	}
	} else {
	$text = $this->tokenText( array( 'mode' => 'attr-unquoted' ) );
	$this->assert( $text, "An unqouted attribute was found missing it's text." );
	}
	} else {
	$text = true;// Just a truthy attribute
	}
	$tag->attributes[] = array(
	'name' => $attrName->text(),
	'value' => $text
	);
	} else {
	$this->error( "Unexpected characters found while parsing a tag." );
	}
	}

	// @todo Special handling for script and style tags that use a different parse model

	$this->emit( $tag );

	if ( $selfClosing ) {
	$close = new Token( 'tag' );
	$close->open = false;
	$close->name = $tag->name;
	$close->nameHint = $tag->nameHint;
	$this->emit( $close );
	}

	return true;
	}

	public function tokenText( $options = array( 'mode' => 'html' ) ) {
	$allquotes = "'" . '"' . '`';
	// Handle options
	$defaults = array(
	'quotes' => true,
	'\|' => true,
	'=' => true,
	'/' => true,
	'whitespace' => true,
	'func' => true,
	'subst' => true,
	'conditional' => false,
	'empty' => false,
	'emit' => false,
	);
	foreach ( $defaults as $key => $default ) {
	if ( !array_key_exists( $key, $options ) ) {
	$options[$key] = $default;
	}
	}
	if ( isset( $options['mode'] ) ) {
	switch ( $options['mode'] ) {
	case 'html':
	break;
	case 'attr-unquoted':
	$options['func'] = false;
	$options['subst'] = false;
	$options['quotes'] = false;
	$options['whitespace'] = false;
	$options['='] = false;
	$options['/'] = false;
	break;
	case 'attr-quoted':
	$options['conditional'] = true;
	// Use a string replace trick to list the quotes that aren't the same as the one matched
	$options['quotes'] = str_replace( $options['forquote'], '', $allquotes );
	break;
	case 'expr':
	$options['\|'] = false;
	break;
	case 'block-expr':
	$options['conditional'] = true;
	break;
	default:
	throw new Exception( __METHOD__ . ': Unknown mode.' );
	break;
	}
	unset( $options['mode'] );
	}
	// Build plaintext catching regexp
	$re = '/[^';
	// Never parse a < or >
	$re .= '<>';
	// & is handled exclusively by our entity handling code
	$re .= '&';
	// Curly braces are exclusively used by func, subst, and conditional syntax
	$re .= '{}';
	// If pipe is not permitted (eg: in a curly expr) don't allow it
	if ( !$options['\|'] ) {
	$re .= '\|';
	}
	// If = is not permitted (eg: in an unquoted attribute) don't allow it
	if ( !$options['='] ) {
	$re .= '=';
	}
	// If / is not permitted (eg: in an unquoted attribute) don't allow it
	if ( !$options['/'] ) {
	$re .= '\/';
	}
	// If whitespace is not permitted don't allow it
	if ( !$options['whitespace'] ) {
	$re .= '\s';
	}
	//
	$quotes = $options['quotes'];
	if ( $quotes === true ) {
	$quotes = str_split( $allquotes );
	} elseif ( $quotes === false ) {
	$quotes = array();
	} else {
	$quotes = str_split( $quotes );
	}
	$badquotes = array_diff( str_split( $allquotes ), $quotes );
	foreach ( $badquotes as $quote ) {
	$re .= $quote;
	}
	$re .= ']+/';
	$re = new Regexp( $re );

	// Start parsing plaintext and curly expressions
	$nodes = array();
	while ( true ) {
	if ( $m = $this->consume( $re ) ) {
	$text = new Token( 'text' );
	$text->text = $m->text();
	$nodes[] = $text;
	} elseif ( $curly = $this->consume( '{' ) ) {
	// @fixme This code doesn't consult the options to test if something is allowed
	if ( $m = $this->consume( '/' ) ) {
	// Looks like an end tag. Exit this text handling so that the parent condition (if any) can handle it.
	$m->rollback();
	$curly->rollback();
	break;
	} elseif ( $m = $this->consume( 'else}' ) ) {
	// Looks like an {else} tag. Exit this text handling so that the parent condition (if any) can handle it.
	$m->rollback();
	$curly->rollback();
	break;
	} elseif ( $m = $this->consume( $this->rule->conditional ) ) {
	// Conditional
	$condition = $m->group( 1 );
	$cond = new Token( 'cond' );
	$cond->condition = $condition;
	$cond->test = $this->tokenText( array( 'mode' => 'expr' ) );
	$this->assert( $cond->test, "Unexpected characters found while parsing a condition expression." );
	$this->reap( '}', "Unexpected characters found while parsing a condition." );
	$cond->then = $this->tokenText( array( 'mode' => 'block-expr', 'empty' => true ) );
	if ( $this->consume( '{else}' ) ) {
	$cond->else = $this->tokenText( array( 'mode' => 'block-expr', 'empty' => true ) );
	}
	$this->reap( '{/', "Unexpected characters found while parsing conditional text." );
	$m = $this->consume( new Regexp( '/([a-z][-_a-z0-9]*)\}/i' ) );
	$this->assert( $m, "Unexpected characters found while parsing a condition end." );
	$this->assert( $m->group( 1 ) == $cond->condition, "Condition end name did not match the name of the opened condition." );
	$nodes[] = $cond;
	} elseif ( $m = $this->consume( $this->rule->funcName ) ) {
	// func
	$funcName = $m->group( 1 );
	$func = new Token( 'func' );
	$func->name = $funcName;
	$func->text = $this->tokenText( array( 'mode' => 'expr' ) );
	$this->assert( $func->text, "Unexpected characters found while parsing a function expression." );
	$this->reap( '}', "Unexpected characters found while parsing a function." );
	$nodes[] = $func;
	} else {
	// subst
	$subst = new Token( 'subst' );
	$subst->text = $this->tokenText( array( 'mode' => 'expr' ) );
	$this->assert( $subst->text, "Unexpected characters found while parsing a substitution expression." );
	$this->reap( '}', "Unexpected characters found while parsing a substitution." );
	$nodes[] = $subst;
	}
	} else {
	break;
	}
	}
	if ( $options['emit'] ) {
	foreach ( $nodes as $node ) {
	$this->emit( $node );
	}
	}
	if ( !$options['empty'] && count( $nodes ) <= 0 ) {
	return false;
	}
	return $nodes;
	}

	}