blob: 48756ef5de1169fa25ecf221a4f85fb08861c047 [file] [log] [blame]
* File containing the ezcDocumentBBCodeParser class.
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* @package Document
* @version //autogen//
* @license Apache License, Version 2.0
* Parser for bbcode documents.
* @package Document
* @version //autogen//
class ezcDocumentBBCodeParser extends ezcDocumentParser
* Array containing simplified shift ruleset.
* We cannot express the BBCode syntax as a usual grammar using a BNF. With
* the pumping lemma for context free grammars [1] you can easily prove,
* that the word a^n b c^n d e^n is not a context free grammar, and this is
* what the title definitions are.
* This structure contains an array with callbacks implementing the shift
* rules for all tokens. There may be multiple rules for one single token.
* The callbacks itself create syntax elements and push them to the
* document stack. After each push the reduction callbacks will be called
* for the pushed elements.
* The array should look like:
* <code>
* array(
* WHITESPACE => array(
* reductionMethod,
* ...
* ),
* ...
* )
* </code>
* [1]
* @var array
protected $shifts = array(
=> 'shiftOpeningToken',
=> 'shiftClosingToken',
=> 'shiftListItemToken',
=> 'shiftWhitespaceToken',
=> 'shiftTextToken',
=> 'shiftLiteralBlockToken',
=> 'shiftNewLineToken',
=> 'shiftEndOfFileToken',
* Array containing simplified reduce ruleset.
* We cannot express the BBCode syntax as a usual grammar using a BNF. This
* structure implements a pseudo grammar by assigning a number of callbacks
* for internal methods implementing reduction rules for a detected syntax
* element.
* <code>
* array(
* ezcDocumentBBCodeNode::DOCUMENT => 'reduceDocument'
* ...
* )
* </code>
* @var array
protected $reductions = array(
'ezcDocumentBBCodeClosingTagNode' => array(
'ezcDocumentBBCodeParagraphNode' => array(
'ezcDocumentBBCodeDocumentNode' => array(
'ezcDocumentBBCodeListItemNode' => array(
'ezcDocumentBBCodeListEndNode' => array(
* Contains a list of detected syntax elements.
* At the end of a successfull parsing process this should only contain one
* document syntax element. During the process it may contain a list of
* elements, which are up to reduction.
* Each element in the stack has to be an object extending from
* ezcDocumentRstNode, which may again contain any amount such objects.
* This way an abstract syntax tree is constructed.
* @var array
protected $documentStack = array();
* Parse token stream.
* Parse an array of ezcDocumentBBCodeToken objects into a bbcode abstract
* syntax tree.
* @param array $tokens
* @return ezcDocumentBBCodeDocumentNode
public function parse( array $tokens )
echo "\n\nStart parser\n============\n\n";
// /DEBUG */
while ( ( $token = array_shift( $tokens ) ) !== null )
echo "[T] ({$token->line}:{$token->position}) Token: " . get_class( $token ) . " at {$token->line}:{$token->position}.\n";
// /DEBUG */
// First shift given token by the defined reduction methods
$node = false;
foreach ( $this->shifts as $class => $method )
if ( $token instanceof $class )
echo " - Handle token with ->$method\n";
// /DEBUG */
// Try to shift the token with current method
if ( ( $node = $this->$method( $token, $tokens ) ) !== false )
// If the node is still null there was not matching shift rule.
if ( $node === false )
return $this->triggerError( E_PARSE,
"Could not find shift rule for token '" . get_class( $token ) . "'.",
$token->line, $token->position
// Token did not result in any node, it should just be ignored.
if ( $node === null )
echo "[N] Node: " . get_class( $node ) . " at {$node->token->line}:{$node->token->position}.\n";
// /DEBUG */
// Apply reductions to shifted node
do {
foreach ( $this->reductions as $class => $methods )
if ( $node instanceof $class )
foreach ( $methods as $method )
echo " - Handle node with ->$method\n";
// /DEBUG */
if ( ( $node = $this->$method( $node ) ) === null )
echo " - Reduced.\n";
// /DEBUG */
// The node has been handled, exit loop.
break 3;
// Check if the node class has changed and rehandle
// node in this case.
if ( !$node instanceof $class )
echo " - Try subsequent reductions...\n";
// /DEBUG */
continue 2;
} while ( false );
// Check if reductions have been applied, but still returned a
// node, just add to document stack in this case.
if ( $node !== null )
echo " => Prepend " . get_class( $node ) . " to document stack.\n";
// /DEBUG */
array_unshift( $this->documentStack, $node );
// Check if we successfully reduced the document stack
if ( ( count( $this->documentStack ) !== 1 ) ||
( !( $document = reset( $this->documentStack ) ) instanceof ezcDocumentBBCodeDocumentNode ) )
$node = isset( $document ) ? $document : reset( $this->documentStack );
'Expected end of file, got: ' . get_class( $this->documentStack[1] ) . ".",
$this->documentStack[1]->token->line, $this->documentStack[1]->token->position
return $document;
* Shift list item token
* List item tokens indicate a new list item. Just put them on the stack,
* they will be aggregated later.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftListItemToken( ezcDocumentBBCodeToken $token, array &$tokens )
echo " - Shift list item.\n";
// /DEBUG */
return new ezcDocumentBBCodeListItemNode( $token );
* Shift tag opening token
* Opening tags mean that the following contents will be aggregated, once a
* matching closing tag is found. Is just shifted to the document stack.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftOpeningToken( ezcDocumentBBCodeToken $token, array &$tokens )
if ( $token->content !== 'list' )
echo " - Shift opening token {$token->content}.\n";
// /DEBUG */
return new ezcDocumentBBCodeTagNode( $token );
switch ( true )
case $token->parameters === null:
echo " - Shift bullet list.\n";
// /DEBUG */
return new ezcDocumentBBCodeBulletListNode( $token );
echo " - Shift enumerated list.\n";
// /DEBUG */
return new ezcDocumentBBCodeEnumeratedListNode( $token );
* Shift tag clsoing token
* Closing tags mean that the preceeding contents will be aggregated, once a
* matching opening tag is found. Is just shifted to the document stack,
* and the appropriate reduce call will follow right away.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftClosingToken( ezcDocumentBBCodeToken $token, array &$tokens )
if ( $token->content === 'list' )
echo " - Shift list end node.\n";
// /DEBUG */
return new ezcDocumentBBCodeListEndNode( $token );
echo " - Shift closing token {$token->content}.\n";
// /DEBUG */
return new ezcDocumentBBCodeClosingTagNode( $token );
* Shift whitespace token.
* Shift whitespace tokens. Whitespaces are only considered significant, if
* the prior token was not a block level element.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftWhitespaceToken( ezcDocumentBBCodeToken $token, array &$tokens )
if ( isset( $this->documentStack[0] ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeBlockLevelNode ) )
echo " - Shift whitespace text node.\n";
// /DEBUG */
return new ezcDocumentBBCodeTextNode( $token );
echo " - Ignore whitespace node.\n";
// /DEBUG */
return null;
* Shift text token.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftTextToken( ezcDocumentBBCodeToken $token, array &$tokens )
echo " - Shift text node.\n";
// /DEBUG */
return new ezcDocumentBBCodeTextNode( $token );
* Shift literal block token
* Literal blocks are just a chunk of code or similar, where the token can
* jsut be converted into an apropriate node.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftLiteralBlockToken( ezcDocumentBBCodeToken $token, array &$tokens )
if ( isset( $this->documentStack[0] ) &&
( $this->documentStack[0] instanceof ezcDocumentBBCodeParagraphNode ) &&
isset( $tokens[0] ) &&
( $tokens[0] instanceof ezcDocumentBBCodeNewLineToken ) )
// Remove following new line tokens.
do {
array_shift( $tokens );
} while ( isset( $tokens[0] ) &&
( ( $tokens[0] instanceof ezcDocumentBBCodeNewlineToken ) ||
( $tokens[0] instanceof ezcDocumentBBCodeWhitespaceToken ) ) );
echo " - Shift literal block node.\n";
// /DEBUG */
return new ezcDocumentBBCodeLiteralBlockNode( $token );
echo " - Shift inline literal node.\n";
// /DEBUG */
return new ezcDocumentBBCodeInlineLiteralNode( $token );
* Shift new line token.
* Double new lines are considered as paragraphs. All other new lines are
* just shifted as single whitespace text nodes.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftNewLineToken( ezcDocumentBBCodeToken $token, array &$tokens )
while ( isset( $tokens[0] ) &&
( $tokens[0] instanceof ezcDocumentBBCodeWhitespaceToken ) )
array_shift( $tokens );
if ( isset( $tokens[0] ) &&
( $tokens[0] instanceof ezcDocumentBBCodeNewlineToken ) )
do {
array_shift( $tokens );
} while ( isset( $tokens[0] ) &&
( ( $tokens[0] instanceof ezcDocumentBBCodeNewlineToken ) ||
( $tokens[0] instanceof ezcDocumentBBCodeWhitespaceToken ) ) );
echo " - Shift paragraph node.\n";
// /DEBUG */
return new ezcDocumentBBCodeParagraphNode( $token );
elseif ( isset( $this->documentStack[0] ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeBlockLevelNode ) )
echo " - Shift newline as whitespace node.\n";
// /DEBUG */
return new ezcDocumentBBCodeTextNode( $token );
echo " - Ignore whitespace node.\n";
// /DEBUG */
return null;
* Shift EOF token.
* Shift End-Of-File token. We reached the end of the document, and
* therefore shift a document node onto the stack.
* @param ezcDocumentBBCodeToken $token
* @param array $tokens
* @return mixed
protected function shiftEndOfFileToken( ezcDocumentBBCodeToken $token, array &$tokens )
echo " - Shift document node.\n";
// /DEBUG */
return new ezcDocumentBBCodeDocumentNode( $token );
* Reduce tags.
* Locates the matching opening tag for a closing tag and reduces the
* contents found on the way back.
* @param ezcDocumentBBCodeClosingTagNode $node
* @return mixed
protected function reduceTags( ezcDocumentBBCodeClosingTagNode $node )
$nodes = array();
while ( isset( $this->documentStack[0] ) &&
( ( !$this->documentStack[0] instanceof ezcDocumentBBCodeTagNode ) ||
( $this->documentStack[0]->token->content !== $node->token->content ) ) )
$nodes[] = $child = array_shift( $this->documentStack );
if ( ( $child instanceof ezcDocumentBBCodeTagNode ) &&
( !count( $child->nodes ) ) )
return $this->triggerError( E_PARSE,
"Opening tag, without matching closing tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
if ( $child instanceof ezcDocumentBBCodeClosingTagNode )
return $this->triggerError( E_PARSE,
"Closing tag, without matching opening tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
if ( ( !$this->documentStack[0] instanceof ezcDocumentBBCodeTagNode ) ||
( $this->documentStack[0]->token->content !== $node->token->content ) )
return $this->triggerError( E_PARSE,
"Closing tag, without matching opening tag found: '" . $node->token->content . "'.",
$node->token->line, $node->token->position
$node = array_shift( $this->documentStack );
$node->nodes = array_reverse( $nodes );
return $node;
* Reduce list items.
* Aggregates list items and puts them into a found list.
* @param ezcDocumentBBCodeParagraphNode $node
* @return mixed
protected function reduceListItem( ezcDocumentBBCodeNode $node )
$nodes = array();
while ( isset( $this->documentStack[0] ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeListItemNode ) &&
( ( !$this->documentStack[0] instanceof ezcDocumentBBCodeListNode ) ||
( ( $this->documentStack[0] instanceof ezcDocumentBBCodeListNode ) &&
( count( $this->documentStack[0]->nodes ) ) ) ) )
$nodes[] = $child = array_shift( $this->documentStack );
if ( ( $child instanceof ezcDocumentBBCodeTagNode ) &&
( !count( $child->nodes ) ) )
return $this->triggerError( E_PARSE,
"Opening tag, without matching closing tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
if ( $child instanceof ezcDocumentBBCodeClosingTagNode )
return $this->triggerError( E_PARSE,
"Closing tag, without matching opening tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
if ( !isset( $this->documentStack[0] ) )
return $this->triggerError( E_PARSE,
"Missing list item node.",
$child->token->line, $child->token->position
// Wrap non-block-level nodes into paragraphs
$wrapped = array();
$temp = array();
foreach ( $nodes as $child )
if ( !$child instanceof ezcDocumentBBCodeBlockLevelNode )
$temp[] = $child;
elseif ( count( $temp ) )
$wrapped[] = $para = new ezcDocumentBBCodeParagraphNode( $temp[0]->token );
$para->nodes = array_reverse( $temp );
$temp = array();
$wrapped[] = $child;
$wrapped[] = $child;
if ( count( $temp ) )
$wrapped[] = $para = new ezcDocumentBBCodeParagraphNode( $temp[0]->token );
$para->nodes = array_reverse( $temp );
if ( $this->documentStack[0] instanceof ezcDocumentBBCodeListItemNode )
$this->documentStack[0]->nodes = array_reverse( $wrapped );
return $node;
* Reduce list.
* Aggregates list items and puts them into a found list.
* @param ezcDocumentBBCodeParagraphNode $node
* @return mixed
protected function reduceList( ezcDocumentBBCodeNode $node )
$this->reduceListItem( $node );
$nodes = array();
while ( isset( $this->documentStack[0] ) &&
( $this->documentStack[0] instanceof ezcDocumentBBCodeListItemNode ) )
$nodes[] = array_shift( $this->documentStack );
if ( !isset( $this->documentStack[0] ) ||
( !$this->documentStack[0] instanceof ezcDocumentBBCodeListNode ) )
return $this->triggerError( E_PARSE,
"Missing list start node.",
$child->token->line, $child->token->position
$this->documentStack[0]->nodes = array_reverse( $nodes );
return null;
* Reduce paragraph.
* Paragraphs are reduce with all inline tokens, which have been added to
* the document stack before. If there are no inline nodes, the paragraph
* will be ommitted.
* @param ezcDocumentBBCodeParagraphNode $node
* @return mixed
protected function reduceParagraph( ezcDocumentBBCodeParagraphNode $node )
$nodes = array();
while ( isset( $this->documentStack[0] ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeParagraphNode ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeListNode ) &&
( !$this->documentStack[0] instanceof ezcDocumentBBCodeLiteralBlockNode ) )
$nodes[] = $child = array_shift( $this->documentStack );
if ( ( $child instanceof ezcDocumentBBCodeTagNode ) &&
( !count( $child->nodes ) ) )
return $this->triggerError( E_PARSE,
"Opening tag, without matching closing tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
if ( $child instanceof ezcDocumentBBCodeClosingTagNode )
return $this->triggerError( E_PARSE,
"Closing tag, without matching opening tag found: '" . $child->token->content . "'.",
$child->token->line, $child->token->position
// Omit empty paragraphs
if ( !count( $nodes ) )
return null;
$node->nodes = array_reverse( $nodes );
return $node;
* Reduce prior sections, if a new section has been found.
* If a new section has been found all sections with a higher depth level
* can be closed, and all items fitting into sections may be aggregated by
* the respective sections as well.
* @param ezcDocumentBBCodeDocumentNode $node
protected function reduceDocument( ezcDocumentBBCodeDocumentNode $node )
$nodes = array();
while ( isset( $this->documentStack[0] ) &&
( ( $this->documentStack[0] instanceof ezcDocumentBBCodeParagraphNode ) ||
( $this->documentStack[0] instanceof ezcDocumentBBCodeListNode ) ||
( $this->documentStack[0] instanceof ezcDocumentBBCodeLiteralBlockNode ) ) )
$nodes[] = array_shift( $this->documentStack );
$node->nodes = array_reverse( $nodes );
return $node;