| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.solr.handler.component; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.StringReader; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.WeakHashMap; |
| |
| import org.apache.solr.common.params.QueryElevationParams; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import javax.xml.xpath.XPath; |
| import javax.xml.xpath.XPathConstants; |
| import javax.xml.xpath.XPathExpressionException; |
| import javax.xml.xpath.XPathFactory; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.*; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.params.SolrParams; |
| import org.apache.solr.common.util.DOMUtil; |
| import org.apache.solr.common.util.NamedList; |
| import org.apache.solr.common.util.SimpleOrderedMap; |
| import org.apache.solr.core.Config; |
| import org.apache.solr.core.SolrCore; |
| import org.apache.solr.schema.StrField; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.SchemaField; |
| import org.apache.solr.search.SortSpec; |
| import org.apache.solr.search.SolrIndexSearcher; |
| import org.apache.solr.util.VersionedFile; |
| import org.apache.solr.util.RefCounted; |
| import org.apache.solr.util.plugin.SolrCoreAware; |
| import org.apache.solr.request.SolrQueryRequest; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.InputSource; |
| |
| /** |
| * A component to elevate some documents to the top of the result set. |
| * |
| * @version $Id$ |
| * @since solr 1.3 |
| */ |
| public class QueryElevationComponent extends SearchComponent implements SolrCoreAware |
| { |
| private static Logger log = LoggerFactory.getLogger(QueryElevationComponent.class); |
| |
| // Constants used in solrconfig.xml |
| static final String FIELD_TYPE = "queryFieldType"; |
| static final String CONFIG_FILE = "config-file"; |
| static final String EXCLUDE = "exclude"; |
| |
| // Runtime param -- should be in common? |
| |
| private SolrParams initArgs = null; |
| private Analyzer analyzer = null; |
| private String idField = null; |
| |
| boolean forceElevation = false; |
| // For each IndexReader, keep a query->elevation map |
| // When the configuration is loaded from the data directory. |
| // The key is null if loaded from the config directory, and |
| // is never re-loaded. |
| final Map<IndexReader,Map<String, ElevationObj>> elevationCache = |
| new WeakHashMap<IndexReader, Map<String,ElevationObj>>(); |
| |
| class ElevationObj { |
| final String text; |
| final String analyzed; |
| final BooleanClause[] exclude; |
| final BooleanQuery include; |
| final Map<String,Integer> priority; |
| |
| // use singletons so hashCode/equals on Sort will just work |
| final FieldComparatorSource comparatorSource; |
| |
| ElevationObj( String qstr, List<String> elevate, List<String> exclude ) throws IOException |
| { |
| this.text = qstr; |
| this.analyzed = getAnalyzedQuery( this.text ); |
| |
| this.include = new BooleanQuery(); |
| this.include.setBoost( 0 ); |
| this.priority = new HashMap<String, Integer>(); |
| int max = elevate.size()+5; |
| for( String id : elevate ) { |
| TermQuery tq = new TermQuery( new Term( idField, id ) ); |
| include.add( tq, BooleanClause.Occur.SHOULD ); |
| this.priority.put( id, max-- ); |
| } |
| |
| if( exclude == null || exclude.isEmpty() ) { |
| this.exclude = null; |
| } |
| else { |
| this.exclude = new BooleanClause[exclude.size()]; |
| for( int i=0; i<exclude.size(); i++ ) { |
| TermQuery tq = new TermQuery( new Term( idField, exclude.get(i) ) ); |
| this.exclude[i] = new BooleanClause( tq, BooleanClause.Occur.MUST_NOT ); |
| } |
| } |
| |
| this.comparatorSource = new ElevationComparatorSource(priority); |
| } |
| } |
| |
| @Override |
| public void init( NamedList args ) |
| { |
| this.initArgs = SolrParams.toSolrParams( args ); |
| } |
| |
| public void inform(SolrCore core) |
| { |
| String a = initArgs.get( FIELD_TYPE ); |
| if( a != null ) { |
| FieldType ft = core.getSchema().getFieldTypes().get( a ); |
| if( ft == null ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "Unknown FieldType: '"+a+"' used in QueryElevationComponent" ); |
| } |
| analyzer = ft.getQueryAnalyzer(); |
| } |
| |
| SchemaField sf = core.getSchema().getUniqueKeyField(); |
| if( sf == null || !(sf.getType() instanceof StrField)) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "QueryElevationComponent requires the schema to have a uniqueKeyField implemented using StrField" ); |
| } |
| idField = StringHelper.intern(sf.getName()); |
| |
| forceElevation = initArgs.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); |
| try { |
| synchronized( elevationCache ) { |
| elevationCache.clear(); |
| String f = initArgs.get( CONFIG_FILE ); |
| if( f == null ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "QueryElevationComponent must specify argument: '"+CONFIG_FILE |
| +"' -- path to elevate.xml" ); |
| } |
| File fC = new File( core.getResourceLoader().getConfigDir(), f ); |
| File fD = new File( core.getDataDir(), f ); |
| if( fC.exists() == fD.exists() ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "QueryElevationComponent missing config file: '"+f + "\n" |
| +"either: "+fC.getAbsolutePath() + " or " + fD.getAbsolutePath() + " must exist, but not both." ); |
| } |
| if( fC.exists() ) { |
| log.info( "Loading QueryElevation from: "+fC.getAbsolutePath() ); |
| Config cfg = new Config( core.getResourceLoader(), f ); |
| elevationCache.put(null, loadElevationMap( cfg )); |
| } |
| else { |
| // preload the first data |
| RefCounted<SolrIndexSearcher> searchHolder = null; |
| try { |
| searchHolder = core.getNewestSearcher(false); |
| IndexReader reader = searchHolder.get().getReader(); |
| getElevationMap( reader, core ); |
| } finally { |
| if (searchHolder != null) searchHolder.decref(); |
| } |
| } |
| } |
| } |
| catch( Exception ex ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "Error initializing QueryElevationComponent.", ex ); |
| } |
| } |
| |
| Map<String, ElevationObj> getElevationMap( IndexReader reader, SolrCore core ) throws Exception |
| { |
| synchronized( elevationCache ) { |
| Map<String, ElevationObj> map = elevationCache.get( null ); |
| if (map != null) return map; |
| |
| map = elevationCache.get( reader ); |
| if( map == null ) { |
| String f = initArgs.get( CONFIG_FILE ); |
| if( f == null ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "QueryElevationComponent must specify argument: "+CONFIG_FILE ); |
| } |
| log.info( "Loading QueryElevation from data dir: "+f ); |
| |
| InputStream is = VersionedFile.getLatestFile( core.getDataDir(), f ); |
| Config cfg = new Config( core.getResourceLoader(), f, new InputSource(is), null ); |
| map = loadElevationMap( cfg ); |
| elevationCache.put( reader, map ); |
| } |
| return map; |
| } |
| } |
| |
| private Map<String, ElevationObj> loadElevationMap( Config cfg ) throws IOException |
| { |
| XPath xpath = XPathFactory.newInstance().newXPath(); |
| Map<String, ElevationObj> map = new HashMap<String, ElevationObj>(); |
| NodeList nodes = (NodeList)cfg.evaluate( "elevate/query", XPathConstants.NODESET ); |
| for (int i=0; i<nodes.getLength(); i++) { |
| Node node = nodes.item( i ); |
| String qstr = DOMUtil.getAttr( node, "text", "missing query 'text'" ); |
| |
| NodeList children = null; |
| try { |
| children = (NodeList)xpath.evaluate("doc", node, XPathConstants.NODESET); |
| } |
| catch (XPathExpressionException e) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "query requires '<doc .../>' child" ); |
| } |
| |
| ArrayList<String> include = new ArrayList<String>(); |
| ArrayList<String> exclude = new ArrayList<String>(); |
| for (int j=0; j<children.getLength(); j++) { |
| Node child = children.item(j); |
| String id = DOMUtil.getAttr( child, "id", "missing 'id'" ); |
| String e = DOMUtil.getAttr( child, EXCLUDE, null ); |
| if( e != null ) { |
| if( Boolean.valueOf( e ) ) { |
| exclude.add( id ); |
| continue; |
| } |
| } |
| include.add( id ); |
| } |
| |
| ElevationObj elev = new ElevationObj( qstr, include, exclude ); |
| if( map.containsKey( elev.analyzed ) ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "Boosting query defined twice for query: '"+elev.text+"' ("+elev.analyzed+"')" ); |
| } |
| map.put( elev.analyzed, elev ); |
| } |
| return map; |
| } |
| |
| /** |
| * Helpful for testing without loading config.xml |
| * @throws IOException |
| */ |
| void setTopQueryResults( IndexReader reader, String query, String[] ids, String[] ex ) throws IOException |
| { |
| if( ids == null ) { |
| ids = new String[0]; |
| } |
| if( ex == null ) { |
| ex = new String[0]; |
| } |
| |
| Map<String,ElevationObj> elev = elevationCache.get( reader ); |
| if( elev == null ) { |
| elev = new HashMap<String, ElevationObj>(); |
| elevationCache.put( reader, elev ); |
| } |
| ElevationObj obj = new ElevationObj( query, Arrays.asList(ids), Arrays.asList(ex) ); |
| elev.put( obj.analyzed, obj ); |
| } |
| |
| String getAnalyzedQuery( String query ) throws IOException |
| { |
| if( analyzer == null ) { |
| return query; |
| } |
| StringBuilder norm = new StringBuilder(); |
| TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) ); |
| tokens.reset(); |
| |
| CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); |
| while( tokens.incrementToken() ) { |
| norm.append( termAtt.buffer(), 0, termAtt.length() ); |
| } |
| tokens.end(); |
| tokens.close(); |
| return norm.toString(); |
| } |
| |
| //--------------------------------------------------------------------------------- |
| // SearchComponent |
| //--------------------------------------------------------------------------------- |
| |
| @Override |
| public void prepare(ResponseBuilder rb) throws IOException |
| { |
| SolrQueryRequest req = rb.req; |
| SolrParams params = req.getParams(); |
| // A runtime param can skip |
| if( !params.getBool( QueryElevationParams.ENABLE, true ) ) { |
| return; |
| } |
| |
| boolean exclusive = params.getBool(QueryElevationParams.EXCLUSIVE, false); |
| // A runtime parameter can alter the config value for forceElevation |
| boolean force = params.getBool( QueryElevationParams.FORCE_ELEVATION, forceElevation ); |
| |
| Query query = rb.getQuery(); |
| String qstr = rb.getQueryString(); |
| if( query == null || qstr == null) { |
| return; |
| } |
| |
| qstr = getAnalyzedQuery(qstr); |
| IndexReader reader = req.getSearcher().getReader(); |
| ElevationObj booster = null; |
| try { |
| booster = getElevationMap( reader, req.getCore() ).get( qstr ); |
| } |
| catch( Exception ex ) { |
| throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, |
| "Error loading elevation", ex ); |
| } |
| |
| if( booster != null ) { |
| // Change the query to insert forced documents |
| if (exclusive == true){ |
| //we only want these results |
| rb.setQuery(booster.include); |
| } else { |
| BooleanQuery newq = new BooleanQuery( true ); |
| newq.add( query, BooleanClause.Occur.SHOULD ); |
| newq.add( booster.include, BooleanClause.Occur.SHOULD ); |
| if( booster.exclude != null ) { |
| for( BooleanClause bq : booster.exclude ) { |
| newq.add( bq ); |
| } |
| } |
| rb.setQuery( newq ); |
| } |
| |
| |
| // if the sort is 'score desc' use a custom sorting method to |
| // insert documents in their proper place |
| SortSpec sortSpec = rb.getSortSpec(); |
| if( sortSpec.getSort() == null ) { |
| sortSpec.setSort( new Sort( new SortField[] { |
| new SortField(idField, booster.comparatorSource, false ), |
| new SortField(null, SortField.SCORE, false) |
| })); |
| } |
| else { |
| // Check if the sort is based on score |
| boolean modify = false; |
| SortField[] current = sortSpec.getSort().getSort(); |
| ArrayList<SortField> sorts = new ArrayList<SortField>( current.length + 1 ); |
| // Perhaps force it to always sort by score |
| if( force && current[0].getType() != SortField.SCORE ) { |
| sorts.add( new SortField(idField, booster.comparatorSource, false ) ); |
| modify = true; |
| } |
| for( SortField sf : current ) { |
| if( sf.getType() == SortField.SCORE ) { |
| sorts.add( new SortField(idField, booster.comparatorSource, sf.getReverse() ) ); |
| modify = true; |
| } |
| sorts.add( sf ); |
| } |
| if( modify ) { |
| sortSpec.setSort( new Sort( sorts.toArray( new SortField[sorts.size()] ) ) ); |
| } |
| } |
| } |
| |
| // Add debugging information |
| if( rb.isDebug() ) { |
| List<String> match = null; |
| if( booster != null ) { |
| // Extract the elevated terms into a list |
| match = new ArrayList<String>(booster.priority.size()); |
| for( Object o : booster.include.clauses() ) { |
| TermQuery tq = (TermQuery)((BooleanClause)o).getQuery(); |
| match.add( tq.getTerm().text() ); |
| } |
| } |
| |
| SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>(); |
| dbg.add( "q", qstr ); |
| dbg.add( "match", match ); |
| rb.addDebugInfo( "queryBoosting", dbg ); |
| } |
| } |
| |
| @Override |
| public void process(ResponseBuilder rb) throws IOException { |
| // Do nothing -- the real work is modifying the input query |
| } |
| |
| //--------------------------------------------------------------------------------- |
| // SolrInfoMBean |
| //--------------------------------------------------------------------------------- |
| |
| @Override |
| public String getDescription() { |
| return "Query Boosting -- boost particular documents for a given query"; |
| } |
| |
| @Override |
| public String getVersion() { |
| return "$Revision$"; |
| } |
| |
| @Override |
| public String getSourceId() { |
| return "$Id$"; |
| } |
| |
| @Override |
| public String getSource() { |
| return "$URL$"; |
| } |
| |
| @Override |
| public URL[] getDocs() { |
| try { |
| return new URL[] { |
| new URL("http://wiki.apache.org/solr/QueryElevationComponent") |
| }; |
| } |
| catch (MalformedURLException e) { |
| throw new RuntimeException( e ); |
| } |
| } |
| } |
| |
| class ElevationComparatorSource extends FieldComparatorSource { |
| private final Map<String,Integer> priority; |
| |
| public ElevationComparatorSource( final Map<String,Integer> boosts) { |
| this.priority = boosts; |
| } |
| |
| @Override |
| public FieldComparator<Integer> newComparator(final String fieldname, final int numHits, int sortPos, boolean reversed) throws IOException { |
| return new FieldComparator<Integer>() { |
| |
| FieldCache.StringIndex idIndex; |
| private final int[] values = new int[numHits]; |
| int bottomVal; |
| |
| @Override |
| public int compare(int slot1, int slot2) { |
| return values[slot2] - values[slot1]; // values will be small enough that there is no overflow concern |
| } |
| |
| @Override |
| public void setBottom(int slot) { |
| bottomVal = values[slot]; |
| } |
| |
| private int docVal(int doc) throws IOException { |
| String id = idIndex.lookup[idIndex.order[doc]]; |
| Integer prio = priority.get(id); |
| return prio == null ? 0 : prio.intValue(); |
| } |
| |
| @Override |
| public int compareBottom(int doc) throws IOException { |
| return docVal(doc) - bottomVal; |
| } |
| |
| @Override |
| public void copy(int slot, int doc) throws IOException { |
| values[slot] = docVal(doc); |
| } |
| |
| @Override |
| public void setNextReader(IndexReader reader, int docBase) throws IOException { |
| idIndex = FieldCache.DEFAULT.getStringIndex(reader, fieldname); |
| } |
| |
| @Override |
| public Integer value(int slot) { |
| return values[slot]; |
| } |
| }; |
| } |
| } |