| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.search; |
| |
| import java.io.IOException; |
| import java.lang.invoke.MethodHandles; |
| import java.util.Arrays; |
| import java.util.Locale; |
| import java.util.regex.Pattern; |
| |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.PrefixCodedTerms; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.*; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefBuilder; |
| import org.apache.lucene.util.LongBitSet; |
| import org.apache.lucene.util.automaton.Automata; |
| import org.apache.lucene.util.automaton.Automaton; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.params.SolrParams; |
| import org.apache.solr.request.SolrQueryRequest; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.PointField; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * Finds documents whose specified field has any of the specified values. It's like |
| * {@link TermQParserPlugin} but multi-valued, and supports a variety of internal algorithms. |
| * <br>Parameters: |
| * <br><code>f</code>: The field name (mandatory) |
| * <br><code>separator</code>: the separator delimiting the values in the query string, defaulting to a comma. |
| * If it's a " " then it splits on any consecutive whitespace. |
| * <br><code>method</code>: Any of termsFilter (default), booleanQuery, automaton, docValuesTermsFilter. |
| * <p> |
| * Note that if no values are specified then the query matches no documents. |
| */ |
| public class TermsQParserPlugin extends QParserPlugin { |
| private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); |
| public static final String NAME = "terms"; |
| |
| /** The separator to use in the underlying suggester */ |
| public static final String SEPARATOR = "separator"; |
| |
| /** Choose the internal algorithm */ |
| private static final String METHOD = "method"; |
| |
| private static enum Method { |
| termsFilter { |
| @Override |
| Query makeFilter(String fname, BytesRef[] bytesRefs) { |
| return new TermInSetQuery(fname, bytesRefs);// constant scores |
| } |
| }, |
| booleanQuery { |
| @Override |
| Query makeFilter(String fname, BytesRef[] byteRefs) { |
| BooleanQuery.Builder bq = new BooleanQuery.Builder(); |
| for (BytesRef byteRef : byteRefs) { |
| bq.add(new TermQuery(new Term(fname, byteRef)), BooleanClause.Occur.SHOULD); |
| } |
| return new ConstantScoreQuery(bq.build()); |
| } |
| }, |
| automaton { |
| @Override |
| Query makeFilter(String fname, BytesRef[] byteRefs) { |
| ArrayUtil.timSort(byteRefs); // same sort algo as TermInSetQuery's choice |
| Automaton union = Automata.makeStringUnion(Arrays.asList(byteRefs)); // input must be sorted |
| return new AutomatonQuery(new Term(fname), union);//constant scores |
| } |
| }, |
| docValuesTermsFilter {//on 4x this is FieldCacheTermsFilter but we use the 5x name any way |
| @Override |
| Query makeFilter(String fname, BytesRef[] byteRefs) { |
| // TODO Further tune this heuristic number |
| return (byteRefs.length > 700) ? docValuesTermsFilterTopLevel.makeFilter(fname, byteRefs) : docValuesTermsFilterPerSegment.makeFilter(fname, byteRefs); |
| } |
| }, |
| docValuesTermsFilterTopLevel { |
| @Override |
| Query makeFilter(String fname, BytesRef[] byteRefs) { |
| return disableCacheByDefault(new TopLevelDocValuesTermsQuery(fname, byteRefs)); |
| } |
| }, |
| docValuesTermsFilterPerSegment { |
| @Override |
| Query makeFilter(String fname, BytesRef[] byteRefs) { |
| return disableCacheByDefault(new DocValuesTermsQuery(fname, byteRefs)); |
| } |
| }; |
| |
| private static Query disableCacheByDefault(Query q) { |
| final WrappedQuery wrappedQuery = new WrappedQuery(q); |
| wrappedQuery.setCache(false); |
| return wrappedQuery; |
| } |
| |
| abstract Query makeFilter(String fname, BytesRef[] byteRefs); |
| } |
| |
| @Override |
| public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { |
| return new QParser(qstr, localParams, params, req) { |
| @Override |
| public Query parse() throws SyntaxError { |
| String fname = localParams.get(QueryParsing.F); |
| if (fname == null || fname.isEmpty()) { |
| throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing field to query"); |
| } |
| FieldType ft = req.getSchema().getFieldType(fname); |
| String separator = localParams.get(SEPARATOR, ","); |
| String qstr = localParams.get(QueryParsing.V);//never null |
| Method method = Method.valueOf(localParams.get(METHOD, Method.termsFilter.name())); |
| //TODO pick the default method based on various heuristics from benchmarks |
| //TODO pick the default using FieldType.getSetQuery |
| |
| //if space then split on all whitespace & trim, otherwise strictly interpret |
| final boolean sepIsSpace = separator.equals(" "); |
| if (sepIsSpace) |
| qstr = qstr.trim(); |
| if (qstr.length() == 0) |
| return new MatchNoDocsQuery(); |
| final String[] splitVals = sepIsSpace ? qstr.split("\\s+") : qstr.split(Pattern.quote(separator), -1); |
| assert splitVals.length > 0; |
| |
| if (ft.isPointField()) { |
| if (localParams.get(METHOD) != null) { |
| throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, |
| String.format(Locale.ROOT, "Method '%s' not supported in TermsQParser when using PointFields", localParams.get(METHOD))); |
| } |
| return ((PointField)ft).getSetQuery(this, req.getSchema().getField(fname), Arrays.asList(splitVals)); |
| } |
| |
| BytesRef[] bytesRefs = new BytesRef[splitVals.length]; |
| BytesRefBuilder term = new BytesRefBuilder(); |
| for (int i = 0; i < splitVals.length; i++) { |
| String stringVal = splitVals[i]; |
| //logic same as TermQParserPlugin |
| if (ft != null) { |
| ft.readableToIndexed(stringVal, term); |
| } else { |
| term.copyChars(stringVal); |
| } |
| bytesRefs[i] = term.toBytesRef(); |
| } |
| |
| return method.makeFilter(fname, bytesRefs); |
| } |
| }; |
| } |
| |
| private static class TopLevelDocValuesTermsQuery extends DocValuesTermsQuery { |
| private final String fieldName; |
| private SortedSetDocValues topLevelDocValues; |
| private LongBitSet topLevelTermOrdinals; |
| private boolean matchesAtLeastOneTerm = false; |
| |
| |
| public TopLevelDocValuesTermsQuery(String field, BytesRef... terms) { |
| super(field, terms); |
| this.fieldName = field; |
| } |
| |
| public Weight createWeight(IndexSearcher searcher, final ScoreMode scoreMode, float boost) throws IOException { |
| if (! (searcher instanceof SolrIndexSearcher)) { |
| log.debug("Falling back to DocValuesTermsQuery because searcher [{}] is not the required SolrIndexSearcher", searcher); |
| return super.createWeight(searcher, scoreMode, boost); |
| } |
| |
| topLevelDocValues = DocValues.getSortedSet(((SolrIndexSearcher)searcher).getSlowAtomicReader(), fieldName); |
| topLevelTermOrdinals = new LongBitSet(topLevelDocValues.getValueCount()); |
| PrefixCodedTerms.TermIterator iterator = getTerms().iterator(); |
| |
| long lastTermOrdFound = 0; |
| for(BytesRef term = iterator.next(); term != null; term = iterator.next()) { |
| long currentTermOrd = lookupTerm(topLevelDocValues, term, lastTermOrdFound); |
| if (currentTermOrd >= 0L) { |
| matchesAtLeastOneTerm = true; |
| topLevelTermOrdinals.set(currentTermOrd); |
| lastTermOrdFound = currentTermOrd; |
| } |
| } |
| |
| return new ConstantScoreWeight(this, boost) { |
| public Scorer scorer(LeafReaderContext context) throws IOException { |
| if (! matchesAtLeastOneTerm) { |
| return null; |
| } |
| |
| SortedSetDocValues segmentDocValues = DocValues.getSortedSet(context.reader(), fieldName); |
| if (segmentDocValues == null) { |
| return null; |
| } |
| |
| final int docBase = context.docBase; |
| return new ConstantScoreScorer(this, this.score(), scoreMode, new TwoPhaseIterator(segmentDocValues) { |
| public boolean matches() throws IOException { |
| topLevelDocValues.advanceExact(docBase + approximation.docID()); |
| for(long ord = topLevelDocValues.nextOrd(); ord != -1L; ord = topLevelDocValues.nextOrd()) { |
| if (topLevelTermOrdinals.get(ord)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| public float matchCost() { |
| return 10.0F; |
| } |
| }); |
| |
| } |
| |
| public boolean isCacheable(LeafReaderContext ctx) { |
| return DocValues.isCacheable(ctx, new String[]{fieldName}); |
| } |
| }; |
| } |
| |
| /* |
| * Same binary-search based implementation as SortedSetDocValues.lookupTerm(BytesRef), but with an |
| * optimization to narrow the search space where possible by providing a startOrd instead of begining each search |
| * at 0. |
| */ |
| private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException { |
| long low = startOrd; |
| long high = docValues.getValueCount()-1; |
| |
| while (low <= high) { |
| long mid = (low + high) >>> 1; |
| final BytesRef term = docValues.lookupOrd(mid); |
| int cmp = term.compareTo(key); |
| |
| if (cmp < 0) { |
| low = mid + 1; |
| } else if (cmp > 0) { |
| high = mid - 1; |
| } else { |
| return mid; // key found |
| } |
| } |
| |
| return -(low + 1); // key not found. |
| } |
| } |
| } |