blob: ae07eb1ec6dcc6f313aaf34fae24aadbfcd92307 [file] [log] [blame]
<?php
/**
* File containing the ezcDocumentOdt class
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* @package Document
* @version //autogen//
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
*/
/**
* The document handler for Open Document Text (ODT) documents.
*
* @package Document
* @version //autogen//
*/
class ezcDocumentOdt extends ezcDocumentXmlBase /* implements ezcDocumentValidation */
{
const NS_ODT_CONFIG = 'urn:oasis:names:tc:opendocument:xmlns:config:1.0';
const NS_ODT_DRAWING = 'urn:oasis:names:tc:opendocument:xmlns:drawing:1.0';
const NS_ODT_FO = 'urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0';
const NS_ODT_META = 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0';
const NS_ODT_NUMBER = 'urn:oasis:names:tc:opendocument:xmlns:data style:1.0';
const NS_ODT_OFFICE = 'urn:oasis:names:tc:opendocument:xmlns:office:1.0';
const NS_ODT_SCRIPT = 'urn:oasis:names:tc:opendocument:xmlns:script:1.0';
const NS_ODT_STYLE = 'urn:oasis:names:tc:opendocument:xmlns:style:1.0';
const NS_ODT_SVG = 'urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0';
const NS_ODT_TABLE = 'urn:oasis:names:tc:opendocument:xmlns:table:1.0';
const NS_ODT_TEXT = 'urn:oasis:names:tc:opendocument:xmlns:text:1.0';
const NS_XLINK = 'http://www.w3.org/1999/xlink';
const NS_EZC = 'http://ezcomponents.org/Document/Odt';
const NS_XML = 'http://www.w3.org/XML/1998/namespace';
const NS_DC = 'http://purl.org/dc/elements/1.1/';
/**
* Array with filter objects for the input ODT document.
*
* @var array(ezcDocumentOdtFilter)
*/
protected $filters;
/**
* Construct ODT document.
*
* @ignore
* @param ezcDocumentOdtOptions $options
* @return void
*/
public function __construct( ezcDocumentOdtOptions $options = null )
{
parent::__construct( $options === null ?
new ezcDocumentOdtOptions() :
$options );
$this->filters = array(
new ezcDocumentOdtImageFilter( $this->options ),
new ezcDocumentOdtElementFilter(),
new ezcDocumentOdtStyleFilter(),
);
}
/**
* Create document from input string.
*
* Create a document of the current type handler class and parse it into a
* usable internal structure.
*
* @param string $string
* @return void
*/
public function loadString( $string )
{
// Use internal error handling to handle XML errors manually.
$oldXmlErrorHandling = libxml_use_internal_errors( true );
libxml_clear_errors();
// Load XML document
$this->document = new DOMDocument();
$this->document->registerNodeClass( 'DOMElement', 'ezcDocumentPropertyContainerDomElement' );
$this->document->loadXml( $string );
$errors = ( $this->options->failOnError ?
libxml_get_errors() :
null );
libxml_clear_errors();
libxml_use_internal_errors( $oldXmlErrorHandling );
// If there are errors and the error handling is activated throw an
// exception with the occured errors.
if ( $errors )
{
throw new ezcDocumentErroneousXmlException( $errors );
}
}
/**
* Set filters
*
* Set an array with filter objects, which extract the sematic
* information from the given ODT document.
*
* @param array $filters
* @return void
*/
public function setFilters( array $filters )
{
$this->filters = $filters;
}
/**
* Build docbook document out of annotated ODT document
*
* @param DOMDocument $document
* @return DOMDocument
*/
protected function buildDocbookDocument( DOMDocument $document )
{
$docbook = new DOMDocument( '1.0', 'utf-8' );
$docbook->preserveWhiteSpace = false;
$docbook->formatOutput = true;
$root = $docbook->createElementNs( 'http://docbook.org/ns/docbook', 'article' );
$docbook->appendChild( $root );
$xpath = new DOMXPath( $document );
$xpath->registerNamespace( 'office', self::NS_ODT_OFFICE );
// @todo: Process meta data
$body = $xpath->query( '//office:body' )->item( 0 );
$this->transformToDocbook( $body, $root );
return $docbook;
}
/**
* Recursively transform annotated ODT elements to docbook
*
* @param DOMElement $odt
* @param DOMElement $docbook
* @param bool $significantWhitespace
* @return void
*/
protected function transformToDocbook( DOMElement $odt, DOMElement $docbook, $significantWhitespace = false )
{
if ( ( $spaces = $odt->getProperty( 'spaces' ) ) !== false )
{
$docbook->appendChild(
new DOMText( $spaces )
);
}
if ( ( $tagName = $odt->getProperty( 'type' ) ) !== false )
{
$node = new DOMElement( $tagName );
$docbook->appendChild( $node );
$docbook = $node;
if ( ( $attributes = $odt->getProperty( 'attributes' ) ) !== false )
{
foreach ( $attributes as $name => $value )
{
$node->setAttribute( $name, $value );
}
}
}
$numChildren = $odt->childNodes->length;
for ( $i = 0; $i < $numChildren; ++$i )
{
$child = $odt->childNodes->item( $i );
switch ( $child->nodeType )
{
case XML_ELEMENT_NODE:
$this->transformToDocbook(
$child,
$docbook,
$significantWhitespace || $odt->getProperty( 'whitespace' ) === 'significant'
);
break;
case XML_TEXT_NODE:
$docbook->appendChild(
new DOMText( $child->data )
);
break;
case XML_CDATA_SECTION_NODE:
$docbook->appendChild(
$docbook->ownerDocument->createCDATASection(
$child->data
)
);
break;
// case XML_ENTITY_NODE:
// Seems not required, as entities in the source document
// are automatically transformed back to their text
// targets.
// break;
case XML_COMMENT_NODE:
$comment = new DOMElement( 'comment', $child->data );
$docbook->appendChild( $comment );
break;
}
}
}
/**
* Return document compiled to the docbook format
*
* The internal document structure is compiled to the docbook format and
* the resulting docbook document is returned.
*
* This method is required for all formats to have one central format, so
* that each format can be compiled into each other format using docbook as
* an intermediate format.
*
* You may of course just call an existing converter for this conversion.
*
* @return ezcDocumentDocbook
*/
public function getAsDocbook()
{
foreach ( $this->filters as $filter )
{
$filter->filter( $this->document );
}
$docbook = new ezcDocumentDocbook();
$docbook->setDomDocument(
$this->buildDocbookDocument( $this->document )
);
$docbook->setPath( $this->path );
return $docbook;
}
/**
* Create document from docbook document
*
* A document of the docbook format is provided and the internal document
* structure should be created out of this.
*
* This method is required for all formats to have one central format, so
* that each format can be compiled into each other format using docbook as
* an intermediate format.
*
* You may of course just call an existing converter for this conversion.
*
* @param ezcDocumentDocbook $document
* @return void
*/
public function createFromDocbook( ezcDocumentDocbook $document )
{
if ( $this->options->validate &&
$document->validateString( $document ) !== true )
{
$this->triggerError( E_WARNING, "You try to convert an invalid docbook document. This may lead to invalid output." );
}
$this->path = $document->getPath();
$converter = new ezcDocumentDocbookToOdtConverter();
$converter->options->errorReporting = $this->options->errorReporting;
$doc = $converter->convert( $document );
$this->document = $doc->getDomDocument();
}
/**
* Return document as string
*
* Serialize the document to a string an return it.
*
* @return string
*/
public function save()
{
$source = $this->document->saveXml( $this->document );
return $source;
}
/**
* Validate the input file
*
* Validate the input file against the specification of the current
* document format.
*
* Returns true, if the validation succeded, and an array with
* ezcDocumentValidationError objects otherwise.
*
* @param string $file
* @return mixed
*/
public function validateFile( $file )
{
$oldSetting = libxml_use_internal_errors( true );
libxml_clear_errors();
$document = new DOMDocument();
$document->load( $file );
$res = $this->performValidation( $document );
libxml_use_internal_errors( $oldSetting );
return $res;
}
/**
* Performs the actual validation on the given $document.
*
* Returns true on success, an array of errors otherwise.
*
* @param DOMDocument $document
* @return array(ezcDocumentValidationError)|true
*/
private function performValidation( DOMDocument $document )
{
$document->relaxNGValidate(
dirname( __FILE__ ) . '/odt/data/odf_1.2.rng'
);
// Get all errors
$xmlErrors = libxml_get_errors();
$errors = array();
foreach ( $xmlErrors as $error )
{
$errors[] = ezcDocumentValidationError::createFromLibXmlError(
$error
);
}
libxml_clear_errors();
return ( count( $errors ) ? $errors : true );
}
/**
* Validate the input string
*
* Validate the input string against the specification of the current
* document format.
*
* Returns true, if the validation succeded, and an array with
* ezcDocumentValidationError objects otherwise.
*
* @param string $string
* @return mixed
*/
public function validateString( $string )
{
$oldSetting = libxml_use_internal_errors( true );
libxml_clear_errors();
$document = new DOMDocument();
$document->loadXml( $string );
$res = $this->performValidation( $document );
libxml_use_internal_errors( $oldSetting );
return $res;
}
}
?>