blob: 3d091c6b23179c764e9a8fd1c62a4c58f690935c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.solr.common.params.QueryElevationParams;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.DOMUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SortSpec;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.VersionedFile;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.apache.solr.request.SolrQueryRequest;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* A component to elevate some documents to the top of the result set.
*
* @version $Id$
* @since solr 1.3
*/
public class QueryElevationComponent extends SearchComponent implements SolrCoreAware
{
private static Logger log = LoggerFactory.getLogger(QueryElevationComponent.class);
// Constants used in solrconfig.xml
static final String FIELD_TYPE = "queryFieldType";
static final String CONFIG_FILE = "config-file";
static final String EXCLUDE = "exclude";
// Runtime param -- should be in common?
private SolrParams initArgs = null;
private Analyzer analyzer = null;
private String idField = null;
boolean forceElevation = false;
// For each IndexReader, keep a query->elevation map
// When the configuration is loaded from the data directory.
// The key is null if loaded from the config directory, and
// is never re-loaded.
final Map<IndexReader,Map<String, ElevationObj>> elevationCache =
new WeakHashMap<IndexReader, Map<String,ElevationObj>>();
class ElevationObj {
final String text;
final String analyzed;
final BooleanClause[] exclude;
final BooleanQuery include;
final Map<String,Integer> priority;
// use singletons so hashCode/equals on Sort will just work
final FieldComparatorSource comparatorSource;
ElevationObj( String qstr, List<String> elevate, List<String> exclude ) throws IOException
{
this.text = qstr;
this.analyzed = getAnalyzedQuery( this.text );
this.include = new BooleanQuery();
this.include.setBoost( 0 );
this.priority = new HashMap<String, Integer>();
int max = elevate.size()+5;
for( String id : elevate ) {
TermQuery tq = new TermQuery( new Term( idField, id ) );
include.add( tq, BooleanClause.Occur.SHOULD );
this.priority.put( id, max-- );
}
if( exclude == null || exclude.isEmpty() ) {
this.exclude = null;
}
else {
this.exclude = new BooleanClause[exclude.size()];
for( int i=0; i<exclude.size(); i++ ) {
TermQuery tq = new TermQuery( new Term( idField, exclude.get(i) ) );
this.exclude[i] = new BooleanClause( tq, BooleanClause.Occur.MUST_NOT );
}
}
this.comparatorSource = new ElevationComparatorSource(priority);
}
}
@Override
public void init( NamedList args )
{
this.initArgs = SolrParams.toSolrParams( args );
}
public void inform(SolrCore core)
{
String a = initArgs.get( FIELD_TYPE );
if( a != null ) {
FieldType ft = core.getSchema().getFieldTypes().get( a );
if( ft == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Unknown FieldType: '"+a+"' used in QueryElevationComponent" );
}
analyzer = ft.getQueryAnalyzer();
}
SchemaField sf = core.getSchema().getUniqueKeyField();
if( sf == null || !(sf.getType() instanceof StrField)) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"QueryElevationComponent requires the schema to have a uniqueKeyField implemented using StrField" );
}
idField = StringHelper.intern(sf.getName());
forceElevation = initArgs.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation );
try {
synchronized( elevationCache ) {
elevationCache.clear();
String f = initArgs.get( CONFIG_FILE );
if( f == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"QueryElevationComponent must specify argument: '"+CONFIG_FILE
+"' -- path to elevate.xml" );
}
File fC = new File( core.getResourceLoader().getConfigDir(), f );
File fD = new File( core.getDataDir(), f );
if( fC.exists() == fD.exists() ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"QueryElevationComponent missing config file: '"+f + "\n"
+"either: "+fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both." );
}
if( fC.exists() ) {
log.info( "Loading QueryElevation from: "+fC.getAbsolutePath() );
Config cfg = new Config( core.getResourceLoader(), f );
elevationCache.put(null, loadElevationMap( cfg ));
}
else {
// preload the first data
RefCounted<SolrIndexSearcher> searchHolder = null;
try {
searchHolder = core.getNewestSearcher(false);
IndexReader reader = searchHolder.get().getReader();
getElevationMap( reader, core );
} finally {
if (searchHolder != null) searchHolder.decref();
}
}
}
}
catch( Exception ex ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Error initializing QueryElevationComponent.", ex );
}
}
Map<String, ElevationObj> getElevationMap( IndexReader reader, SolrCore core ) throws Exception
{
synchronized( elevationCache ) {
Map<String, ElevationObj> map = elevationCache.get( null );
if (map != null) return map;
map = elevationCache.get( reader );
if( map == null ) {
String f = initArgs.get( CONFIG_FILE );
if( f == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"QueryElevationComponent must specify argument: "+CONFIG_FILE );
}
log.info( "Loading QueryElevation from data dir: "+f );
InputStream is = VersionedFile.getLatestFile( core.getDataDir(), f );
Config cfg = new Config( core.getResourceLoader(), f, new InputSource(is), null );
map = loadElevationMap( cfg );
elevationCache.put( reader, map );
}
return map;
}
}
private Map<String, ElevationObj> loadElevationMap( Config cfg ) throws IOException
{
XPath xpath = XPathFactory.newInstance().newXPath();
Map<String, ElevationObj> map = new HashMap<String, ElevationObj>();
NodeList nodes = (NodeList)cfg.evaluate( "elevate/query", XPathConstants.NODESET );
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item( i );
String qstr = DOMUtil.getAttr( node, "text", "missing query 'text'" );
NodeList children = null;
try {
children = (NodeList)xpath.evaluate("doc", node, XPathConstants.NODESET);
}
catch (XPathExpressionException e) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"query requires '<doc .../>' child" );
}
ArrayList<String> include = new ArrayList<String>();
ArrayList<String> exclude = new ArrayList<String>();
for (int j=0; j<children.getLength(); j++) {
Node child = children.item(j);
String id = DOMUtil.getAttr( child, "id", "missing 'id'" );
String e = DOMUtil.getAttr( child, EXCLUDE, null );
if( e != null ) {
if( Boolean.valueOf( e ) ) {
exclude.add( id );
continue;
}
}
include.add( id );
}
ElevationObj elev = new ElevationObj( qstr, include, exclude );
if( map.containsKey( elev.analyzed ) ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Boosting query defined twice for query: '"+elev.text+"' ("+elev.analyzed+"')" );
}
map.put( elev.analyzed, elev );
}
return map;
}
/**
* Helpful for testing without loading config.xml
* @throws IOException
*/
void setTopQueryResults( IndexReader reader, String query, String[] ids, String[] ex ) throws IOException
{
if( ids == null ) {
ids = new String[0];
}
if( ex == null ) {
ex = new String[0];
}
Map<String,ElevationObj> elev = elevationCache.get( reader );
if( elev == null ) {
elev = new HashMap<String, ElevationObj>();
elevationCache.put( reader, elev );
}
ElevationObj obj = new ElevationObj( query, Arrays.asList(ids), Arrays.asList(ex) );
elev.put( obj.analyzed, obj );
}
String getAnalyzedQuery( String query ) throws IOException
{
if( analyzer == null ) {
return query;
}
StringBuilder norm = new StringBuilder();
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
tokens.reset();
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
while( tokens.incrementToken() ) {
norm.append( termAtt.buffer(), 0, termAtt.length() );
}
tokens.end();
tokens.close();
return norm.toString();
}
//---------------------------------------------------------------------------------
// SearchComponent
//---------------------------------------------------------------------------------
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
// A runtime param can skip
if( !params.getBool( QueryElevationParams.ENABLE, true ) ) {
return;
}
boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false);
// A runtime parameter can alter the config value for forceElevation
boolean force = params.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation );
Query query = rb.getQuery();
String qstr = rb.getQueryString();
if( query == null || qstr == null) {
return;
}
qstr = getAnalyzedQuery(qstr);
IndexReader reader = req.getSearcher().getReader();
ElevationObj booster = null;
try {
booster = getElevationMap( reader, req.getCore() ).get( qstr );
}
catch( Exception ex ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"Error loading elevation", ex );
}
if( booster != null ) {
// Change the query to insert forced documents
if (exclusive == true){
//we only want these results
rb.setQuery(booster.include);
} else {
BooleanQuery newq = new BooleanQuery( true );
newq.add( query, BooleanClause.Occur.SHOULD );
newq.add( booster.include, BooleanClause.Occur.SHOULD );
if( booster.exclude != null ) {
for( BooleanClause bq : booster.exclude ) {
newq.add( bq );
}
}
rb.setQuery( newq );
}
// if the sort is 'score desc' use a custom sorting method to
// insert documents in their proper place
SortSpec sortSpec = rb.getSortSpec();
if( sortSpec.getSort() == null ) {
sortSpec.setSort( new Sort( new SortField[] {
new SortField(idField, booster.comparatorSource, false ),
new SortField(null, SortField.SCORE, false)
}));
}
else {
// Check if the sort is based on score
boolean modify = false;
SortField[] current = sortSpec.getSort().getSort();
ArrayList<SortField> sorts = new ArrayList<SortField>( current.length + 1 );
// Perhaps force it to always sort by score
if( force && current[0].getType() != SortField.SCORE ) {
sorts.add( new SortField(idField, booster.comparatorSource, false ) );
modify = true;
}
for( SortField sf : current ) {
if( sf.getType() == SortField.SCORE ) {
sorts.add( new SortField(idField, booster.comparatorSource, sf.getReverse() ) );
modify = true;
}
sorts.add( sf );
}
if( modify ) {
sortSpec.setSort( new Sort( sorts.toArray( new SortField[sorts.size()] ) ) );
}
}
}
// Add debugging information
if( rb.isDebug() ) {
List<String> match = null;
if( booster != null ) {
// Extract the elevated terms into a list
match = new ArrayList<String>(booster.priority.size());
for( Object o : booster.include.clauses() ) {
TermQuery tq = (TermQuery)((BooleanClause)o).getQuery();
match.add( tq.getTerm().text() );
}
}
SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
dbg.add( "q", qstr );
dbg.add( "match", match );
rb.addDebugInfo( "queryBoosting", dbg );
}
}
@Override
public void process(ResponseBuilder rb) throws IOException {
// Do nothing -- the real work is modifying the input query
}
//---------------------------------------------------------------------------------
// SolrInfoMBean
//---------------------------------------------------------------------------------
@Override
public String getDescription() {
return "Query Boosting -- boost particular documents for a given query";
}
@Override
public String getVersion() {
return "$Revision$";
}
@Override
public String getSourceId() {
return "$Id$";
}
@Override
public String getSource() {
return "$URL$";
}
@Override
public URL[] getDocs() {
try {
return new URL[] {
new URL("http://wiki.apache.org/solr/QueryElevationComponent")
};
}
catch (MalformedURLException e) {
throw new RuntimeException( e );
}
}
}
class ElevationComparatorSource extends FieldComparatorSource {
private final Map<String,Integer> priority;
public ElevationComparatorSource( final Map<String,Integer> boosts) {
this.priority = boosts;
}
@Override
public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException {
return new FieldComparator<Integer>() {
FieldCache.StringIndex idIndex;
private final int[] values = new int[numHits];
int bottomVal;
@Override
public int compare(int slot1, int slot2) {
return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern
}
@Override
public void setBottom(int slot) {
bottomVal = values[slot];
}
private int docVal(int doc) throws IOException {
String id = idIndex.lookup[idIndex.order[doc]];
Integer prio = priority.get(id);
return prio == null ? 0 : prio.intValue();
}
@Override
public int compareBottom(int doc) throws IOException {
return docVal(doc) - bottomVal;
}
@Override
public void copy(int slot, int doc) throws IOException {
values[slot] = docVal(doc);
}
@Override
public void setNextReader(IndexReader reader, int docBase) throws IOException {
idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname);
}
@Override
public Integer value(int slot) {
return values[slot];
}
};
}
}