blob: 20e108741782693b08d715d8d1008f9d7d407a0a [file] [log] [blame]
<?php
/**
* File containing the ezcSearchZendLuceneHandler class.
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* @package Search
* @version //autogentag//
* @copyright Copyright (C) 2005-2010 eZ Systems AS. All rights reserved.
* @license http://www.apache.org/licenses/LICENSE-2.0 Apache License, Version 2.0
* @ignore
*/
/**
* ZendLucene backend implementation
*
* @package Search
* @version //autogentag//
* @ignore
*/
class ezcSearchZendLuceneHandler implements ezcSearchHandler, ezcSearchIndexHandler
{
/**
* Holds the connection to ZendLucene
*
* @var resource(stream)
*/
public $connection;
/**
* Stores the transaction nesting depth.
*
* @var integer
*/
private $inTransaction;
/**
* Creates a new ZendLucene handler connection
*
* @param string $location
*/
public function __construct( $location )
{
/**
* We're using realpath here because Zend_Search_Lucene does not do
* that itself. It can cause issues because their destructor uses the
* same filename but the cwd could have been changed.
*/
$location = realpath( $location );
/* If the $location doesn't exist, ZSL throws a *generic* exception. We
* don't care here though and just always assume it is because the
* index does not exist. If it doesn't exist, we create it.
*/
try
{
$this->connection = Zend_Search_Lucene::open( $location );
}
catch ( Zend_Search_Lucene_Exception $e )
{
$this->connection = Zend_Search_Lucene::create( $location );
}
$this->inTransaction = 0;
if ( !$this->connection )
{
throw new ezcSearchCanNotConnectException( 'zendlucene', $location );
}
// Set proper default encoding for query parser
Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding( 'UTF-8' );
Zend_Search_Lucene_Analysis_Analyzer::setDefault(
new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive()
);
}
/**
* Starts a transaction for indexing.
*
* When using a transaction, the amount of processing that zendlucene does
* decreases, increasing indexing performance. Without this, the component
* sends a commit after every document that is indexed. Transactions can be
* nested, when commit() is called the same number of times as
* beginTransaction(), the component sends a commit.
*/
public function beginTransaction()
{
}
/**
* Ends a transaction and calls commit.
*
* As transactions can be nested, this method will only call commit when
* all the nested transactions have been ended.
*
* @throws ezcSearchTransactionException if no transaction is active.
*/
public function commit()
{
$this->connection->optimize();
}
/**
* Builds query parameters from the different query fields
*
* @param string $queryWord
* @param array(string=>string) $searchFieldList
* @return array
*/
private function buildQuery( $queryWord, $searchFieldList = array() )
{
if ( count( $searchFieldList ) > 0 )
{
$queryString = '';
foreach ( $searchFieldList as $searchField )
{
$queryString .= "$searchField:$queryWord ";
}
}
else
{
$queryString = $queryWord;
}
return $queryString;
}
private function createDataForHit( $hit, $def )
{
$document = $hit->getDocument();
$className = $def->documentType;
$obj = new $className;
$attr = array();
foreach ( $def->fields as $field )
{
$fieldName = $this->mapFieldType( $field->field, $field->type );
if ( $field->inResult /*&& isset( $document->$fieldName ) */ )
{
$attr[$field->field] = $this->mapFieldValuesForReturn( $field, $document->$fieldName );
}
}
$obj->setState( $attr );
return new ezcSearchResultDocument( $hit->score, $obj );
}
/**
* Converts a raw zendlucene result into a document using the definition $def
*
* @param ezcSearchQuery $query
* @param mixed $response
* @return ezcSearchResult
*/
private function createResponseFromData( ezcSearchQuery $query, $response )
{
$def = $query->getDefinition();
$s = new ezcSearchResult();
$s->resultCount = count( $response );
$counter = -1;
foreach ( $response as $hit )
{
$counter++;
if ( $counter < $query->offset )
{
continue;
}
if ( $query->limit !== null && $counter >= ( $query->limit + $query->offset ) )
{
break;
}
$resultDocument = $this->createDataForHit( $hit, $def );
$idProperty = $def->idProperty;
$s->documents[$resultDocument->document->$idProperty] = $resultDocument;
}
return $s;
}
/**
* Executes a search by building and sending a query and returns the raw result
*
* @param string $queryWord
* @param array(string=>string) $searchFieldList
* @param array(string=>string) $order
* @return stdClass
*/
public function search( $queryWord, $searchFieldList = array(), $order = array() )
{
$query = $this->buildQuery( $queryWord, $searchFieldList );
$args = array();
$args[] = $query;
if ( is_array( $order ) )
{
foreach ( $order as $field => $sort )
{
$args[] = $field;
$args[] = SORT_REGULAR;
$args[] = $sort == ezcSearchQueryTools::ASC ? SORT_ASC : SORT_DESC;
}
}
$result = call_user_func_array( array( $this->connection, 'find' ), $args );
return $result;
}
/**
* Returns 'zendlucene'.
*
* @return string
*/
static public function getName()
{
return 'zendlucene';
}
/**
* Creates a search query object with the fields from the definition filled in.
*
* @param string $type
* @param ezcSearchDocumentDefinition $definition
* @return ezcSearchFindQuery
*/
public function createFindQuery( $type, ezcSearchDocumentDefinition $definition )
{
$query = new ezcSearchQueryZendLucene( $this, $definition );
$query->select( 'score' );
if ( $type )
{
$selectFieldNames = array();
foreach ( $definition->getSelectFieldNames() as $docProp )
{
$selectFieldNames[] = $this->mapFieldType( $docProp, $definition->fields[$docProp]->type );
}
$highlightFieldNames = array();
foreach ( $definition->getHighlightFieldNames() as $docProp )
{
$highlightFieldNames[] = $this->mapFieldType( $docProp, $definition->fields[$docProp]->type );
}
$query->select( $selectFieldNames );
$query->where( $query->eq( 'ezcsearch_type', $type ) );
$query->highlight( $highlightFieldNames );
}
return $query;
}
/**
* Builds the search query and returns the parsed response
*
* @param ezcSearchFindQuery $query
* @return ezcSearchResult
*/
public function find( ezcSearchFindQuery $query )
{
$queryWord = join( ' AND ', $query->whereClauses );
$order = $query->orderByClauses;
$res = $this->search( $queryWord, array(), $order );
return $this->createResponseFromData( $query, $res );
}
/**
* Returns the query as a string for debugging purposes
*
* @param ezcSearchQueryZendLucene $query
* @return string
* @ignore
*/
public function getQuery( ezcSearchQueryZendLucene $query )
{
$queryWord = join( ' AND ', $query->whereClauses );
return $this->buildQuery( $queryWord );
}
/**
* Returns the field name as used by zendlucene created from the field $name and $type.
*
* @param string $name
* @param string $type
* @return string
*/
public function mapFieldType( $name, $type )
{
return $name;
}
/**
* This method prepares a $value before it is passed to the indexer.
*
* Depending on the $fieldType the $value is modified so that the indexer understands the value.
*
* @param string $fieldType
* @param mixed $value
* @return mixed
*/
public function mapFieldValueForIndex( $fieldType, $value )
{
switch ( $fieldType )
{
case ezcSearchDocumentDefinition::DATE:
if ( is_numeric( $value ) )
{
$d = new DateTime( "@$value" );
$value = $d->format( 'U' );
}
else
{
try
{
$d = new DateTime( $value );
}
catch ( Exception $e )
{
throw new ezcSearchInvalidValueException( $type, $value );
}
$value = $d->format( 'U' );
}
break;
case ezcSearchDocumentDefinition::BOOLEAN:
$value = $value ? 'true' : 'false';
break;
}
return $value;
}
/**
* This method prepares a $value before it is passed to the search handler.
*
* Depending on the $fieldType the $value is modified so that the search
* handler understands the value.
*
* @param string $fieldType
* @param mixed $value
* @return mixed
*/
public function mapFieldValueForSearch( $fieldType, $value )
{
switch ( $fieldType )
{
case ezcSearchDocumentDefinition::STRING:
case ezcSearchDocumentDefinition::TEXT:
case ezcSearchDocumentDefinition::HTML:
$value = trim( $value );
if ( strpbrk( $value, ' "' ) !== false )
{
$value = '"' . str_replace( '"', '\"', $value ) . '"';
}
break;
case ezcSearchDocumentDefinition::INT:
case ezcSearchDocumentDefinition::FLOAT:
$value = '"' . $value . '"';
break;
case ezcSearchDocumentDefinition::DATE:
if ( is_numeric( $value ) )
{
$d = new DateTime( "@$value" );
$value = $d->format( 'U' );
}
else
{
try
{
$d = new DateTime( $value );
}
catch ( Exception $e )
{
throw new ezcSearchInvalidValueException( $type, $value );
}
$value = $d->format( 'U' );
}
break;
case ezcSearchDocumentDefinition::BOOLEAN:
$value = ($value ? 'true' : 'false');
break;
}
return $value;
}
/**
* This method prepares a $value before it is passed to the search handler.
*
* Depending on the $fieldType the $value is modified so that the search
* handler understands the value.
*
* @param string $fieldType
* @param mixed $value
* @return mixed
*/
public function mapFieldValueForReturn( $fieldType, $value )
{
switch ( $fieldType )
{
case ezcSearchDocumentDefinition::BOOLEAN:
$value = $value == 'true' ? true : false;
break;
case ezcSearchDocumentDefinition::DATE:
$value = new DateTime( "@$value" );
break;
}
return $value;
}
/**
* This method prepares a value or an array of $values before it is passed to the search handler.
*
* Depending on the $field the $values is modified so that the search
* handler understands the value. It will also correctly deal with
* multi-data fields in the search index.
*
* @throws ezcSearchInvalidValueException if an array of values is
* submitted, but the field has not been defined as a multi-value field.
*
* @param ezcSearchDocumentDefinitionField $field
* @param mixed $values
* @return array(mixed)
*/
public function mapFieldValuesForSearch( $field, $values )
{
if ( is_array( $values ) && $field->multi == false )
{
throw new ezcSearchInvalidValueException( $field->type, $values, 'multi' );
}
if ( !is_array( $values ) )
{
$values = array( $values );
}
foreach ( $values as &$value )
{
$value = $this->mapFieldValueForSearch( $field->type, $value );
}
return $values;
}
/**
* This method prepares a value or an array of $values before it is passed to the indexer.
*
* Depending on the $field the $values is modified so that the search
* handler understands the value. It will also correctly deal with
* multi-data fields in the search index.
*
* @throws ezcSearchInvalidValueException if an array of values is
* submitted, but the field has not been defined as a multi-value field.
*
* @param ezcSearchDocumentDefinitionField $field
* @param mixed $values
* @return array(mixed)
*/
public function mapFieldValuesForIndex( $field, $values )
{
if ( is_array( $values ) && $field->multi == false )
{
throw new ezcSearchInvalidValueException( $field->type, $values, 'multi' );
}
if ( !is_array( $values ) )
{
$values = array( $values );
}
foreach ( $values as &$value )
{
$value = $this->mapFieldValueForIndex( $field->type, $value );
}
return $values;
}
/**
* This method prepares a value or an array of $values after it has been returned by search handler.
*
* Depending on the $field the $values is modified. It will also correctly
* deal with multi-data fields in the search index.
*
* @param ezcSearchDocumentDefinitionField $field
* @param mixed $values
* @return mixed|array(mixed)
*/
public function mapFieldValuesForReturn( $field, $values )
{
$values = $this->mapFieldValueForReturn( $field->type, $values );
return $values;
}
/**
* Runs a commit command to tell zendlucene we're done indexing.
*/
protected function runCommit()
{
$r = $this->sendRawPostCommand( 'update', array( 'wt' => 'json' ), '<commit/>' );
}
/**
* Indexes the document $document using definition $definition
*
* @param ezcSearchDocumentDefinition $definition
* @param mixed $document
*/
public function index( ezcSearchDocumentDefinition $definition, $document )
{
$doc = new Zend_Search_Lucene_Document();
$doc->addField( Zend_Search_Lucene_Field::Text( 'ezcsearch_type', $definition->documentType ) );
foreach ( $definition->fields as $field )
{
$values = $this->mapFieldValuesForIndex( $field, $document[$field->field] );
foreach ( $values as $value )
{
switch ( $field->type )
{
case ezcSearchDocumentDefinition::INT:
case ezcSearchDocumentDefinition::DATE:
$doc->addField( Zend_Search_Lucene_Field::Keyword( $field->field, $value ) );
break;
default:
$doc->addField( Zend_Search_Lucene_Field::Text( $field->field, $value, 'UTF-8' ) );
}
}
}
$this->connection->addDocument( $doc );
}
/**
* Creates a delete query object with the fields from the definition filled in.
*
* @param string $type
* @param ezcSearchDocumentDefinition $definition
* @return ezcSearchDeleteQuery
*/
public function createDeleteQuery( $type, ezcSearchDocumentDefinition $definition )
{
$query = new ezcSearchDeleteQueryZendLucene( $this, $definition );
if ( $type )
{
$selectFieldNames = array();
$query->where( $query->eq( 'ezcsearch_type', $type ) );
}
return $query;
}
/**
* Deletes a document by the document's $id.
*
* If the document with ID $id does not exist, no warning is returned.
*
* @param mixed $id
* @param ezcSearchDocumentDefinition $definition
*/
public function deleteById( $id, ezcSearchDocumentDefinition $definition )
{
$idProperty = $definition->idProperty;
$fieldName = $this->mapFieldType( $definition->fields[$idProperty]->field, $definition->fields[$idProperty]->type );
$res = $this->search( "ezcsearch_type:{$definition->documentType} AND {$fieldName}:$id" );
if ( count( $res ) == 1 )
{
$this->connection->delete( $res[0]->id );
}
}
/**
* Builds the delete query and returns the parsed response
*
* @param ezcSearchDeleteQuery $query
*/
public function delete( ezcSearchDeleteQuery $query )
{
$queryWord = join( ' AND ', $query->whereClauses );
$res = $this->search( $queryWord );
foreach ( $res as $hit )
{
$this->connection->delete( $hit->id );
}
}
/**
* Finds a document by the document's $id
*
* @throws ezcSearchIdNotFoundException
* if the document with ID $id did not exist.
*
* @param mixed $id
* @param ezcSearchDocumentDefinition $definition
* @return ezcSearchResult
*/
public function findById( $id, ezcSearchDocumentDefinition $definition )
{
$idProperty = $definition->idProperty;
$fieldName = $this->mapFieldType( $definition->fields[$idProperty]->field, $definition->fields[$idProperty]->type );
$res = $this->search( "{$fieldName}:$id" );
if ( count( $res ) != 1 )
{
throw new ezcSearchIdNotFoundException( $id );
}
return $this->createDataForHit( $res[0], $definition );
}
}
?>