| package org.apache.lucene.search; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.List; |
| |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexReaderContext; |
| import org.apache.lucene.index.MultiFields; |
| import org.apache.lucene.index.MultiReader; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; |
| import org.apache.lucene.util._TestUtil; |
| |
| // TODO |
| // - other queries besides PrefixQuery & TermQuery (but: |
| // FuzzyQ will be problematic... the top N terms it |
| // takes means results will differ) |
| // - NRQ/F |
| // - BQ, negated clauses, negated prefix clauses |
| // - test pulling docs in 2nd round trip... |
| // - filter too |
| |
| @SuppressCodecs({ "SimpleText", "Memory", "Direct" }) |
| public class TestShardSearching extends ShardSearchingTestBase { |
| |
| private static class PreviousSearchState { |
| public final long searchTimeNanos; |
| public final long[] versions; |
| public final ScoreDoc searchAfterLocal; |
| public final ScoreDoc searchAfterShard; |
| public final Sort sort; |
| public final Query query; |
| public final int numHitsPaged; |
| |
| public PreviousSearchState(Query query, Sort sort, ScoreDoc searchAfterLocal, ScoreDoc searchAfterShard, long[] versions, int numHitsPaged) { |
| this.versions = versions.clone(); |
| this.searchAfterLocal = searchAfterLocal; |
| this.searchAfterShard = searchAfterShard; |
| this.sort = sort; |
| this.query = query; |
| this.numHitsPaged = numHitsPaged; |
| searchTimeNanos = System.nanoTime(); |
| } |
| } |
| |
| public void testSimple() throws Exception { |
| final int numNodes = _TestUtil.nextInt(random(), 1, 10); |
| |
| final double runTimeSec = atLeast(3); |
| |
| final int minDocsToMakeTerms = _TestUtil.nextInt(random(), 5, 20); |
| |
| final int maxSearcherAgeSeconds = _TestUtil.nextInt(random(), 1, 3); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: numNodes=" + numNodes + " runTimeSec=" + runTimeSec + " maxSearcherAgeSeconds=" + maxSearcherAgeSeconds); |
| } |
| |
| start(numNodes, |
| runTimeSec, |
| maxSearcherAgeSeconds |
| ); |
| |
| final List<PreviousSearchState> priorSearches = new ArrayList<PreviousSearchState>(); |
| List<BytesRef> terms = null; |
| while (System.nanoTime() < endTimeNanos) { |
| |
| final boolean doFollowon = priorSearches.size() > 0 && random().nextInt(7) == 1; |
| |
| // Pick a random node; we will run the query on this node: |
| final int myNodeID = random().nextInt(numNodes); |
| |
| final NodeState.ShardIndexSearcher localShardSearcher; |
| |
| final PreviousSearchState prevSearchState; |
| |
| if (doFollowon) { |
| // Pretend user issued a followon query: |
| prevSearchState = priorSearches.get(random().nextInt(priorSearches.size())); |
| |
| if (VERBOSE) { |
| System.out.println("\nTEST: follow-on query age=" + ((System.nanoTime() - prevSearchState.searchTimeNanos)/1000000000.0)); |
| } |
| |
| try { |
| localShardSearcher = nodes[myNodeID].acquire(prevSearchState.versions); |
| } catch (SearcherExpiredException see) { |
| // Expected, sometimes; in a "real" app we would |
| // either forward this error to the user ("too |
| // much time has passed; please re-run your |
| // search") or sneakily just switch to newest |
| // searcher w/o telling them... |
| if (VERBOSE) { |
| System.out.println(" searcher expired during local shard searcher init: " + see); |
| } |
| priorSearches.remove(prevSearchState); |
| continue; |
| } |
| } else { |
| if (VERBOSE) { |
| System.out.println("\nTEST: fresh query"); |
| } |
| // Do fresh query: |
| localShardSearcher = nodes[myNodeID].acquire(); |
| prevSearchState = null; |
| } |
| |
| final IndexReader[] subs = new IndexReader[numNodes]; |
| |
| PreviousSearchState searchState = null; |
| |
| try { |
| |
| // Mock: now make a single reader (MultiReader) from all node |
| // searchers. In a real shard env you can't do this... we |
| // do it to confirm results from the shard searcher |
| // are correct: |
| int docCount = 0; |
| try { |
| for(int nodeID=0;nodeID<numNodes;nodeID++) { |
| final long subVersion = localShardSearcher.nodeVersions[nodeID]; |
| final IndexSearcher sub = nodes[nodeID].searchers.acquire(subVersion); |
| if (sub == null) { |
| nodeID--; |
| while(nodeID >= 0) { |
| subs[nodeID].decRef(); |
| subs[nodeID] = null; |
| nodeID--; |
| } |
| throw new SearcherExpiredException("nodeID=" + nodeID + " version=" + subVersion); |
| } |
| subs[nodeID] = sub.getIndexReader(); |
| docCount += subs[nodeID].maxDoc(); |
| } |
| } catch (SearcherExpiredException see) { |
| // Expected |
| if (VERBOSE) { |
| System.out.println(" searcher expired during mock reader init: " + see); |
| } |
| continue; |
| } |
| |
| final IndexReader mockReader = new MultiReader(subs); |
| final IndexSearcher mockSearcher = new IndexSearcher(mockReader); |
| |
| Query query; |
| Sort sort; |
| |
| if (prevSearchState != null) { |
| query = prevSearchState.query; |
| sort = prevSearchState.sort; |
| } else { |
| if (terms == null && docCount > minDocsToMakeTerms) { |
| // TODO: try to "focus" on high freq terms sometimes too |
| // TODO: maybe also periodically reset the terms...? |
| final TermsEnum termsEnum = MultiFields.getTerms(mockReader, "body").iterator(null); |
| terms = new ArrayList<BytesRef>(); |
| while(termsEnum.next() != null) { |
| terms.add(BytesRef.deepCopyOf(termsEnum.term())); |
| } |
| if (VERBOSE) { |
| System.out.println("TEST: init terms: " + terms.size() + " terms"); |
| } |
| if (terms.size() == 0) { |
| terms = null; |
| } |
| } |
| |
| if (VERBOSE) { |
| System.out.println(" maxDoc=" + mockReader.maxDoc()); |
| } |
| |
| if (terms != null) { |
| if (random().nextBoolean()) { |
| query = new TermQuery(new Term("body", terms.get(random().nextInt(terms.size())))); |
| } else { |
| final String t = terms.get(random().nextInt(terms.size())).utf8ToString(); |
| final String prefix; |
| if (t.length() <= 1) { |
| prefix = t; |
| } else { |
| prefix = t.substring(0, _TestUtil.nextInt(random(), 1, 2)); |
| } |
| query = new PrefixQuery(new Term("body", prefix)); |
| } |
| |
| if (random().nextBoolean()) { |
| sort = null; |
| } else { |
| // TODO: sort by more than 1 field |
| final int what = random().nextInt(3); |
| if (what == 0) { |
| sort = new Sort(SortField.FIELD_SCORE); |
| } else if (what == 1) { |
| // TODO: this sort doesn't merge |
| // correctly... it's tricky because you |
| // could have > 2.1B docs across all shards: |
| //sort = new Sort(SortField.FIELD_DOC); |
| sort = null; |
| } else if (what == 2) { |
| sort = new Sort(new SortField[] {new SortField("docid_int", SortField.Type.INT, random().nextBoolean())}); |
| } else { |
| sort = new Sort(new SortField[] {new SortField("title", SortField.Type.STRING, random().nextBoolean())}); |
| } |
| } |
| } else { |
| query = null; |
| sort = null; |
| } |
| } |
| |
| if (query != null) { |
| |
| try { |
| searchState = assertSame(mockSearcher, localShardSearcher, query, sort, prevSearchState); |
| } catch (SearcherExpiredException see) { |
| // Expected; in a "real" app we would |
| // either forward this error to the user ("too |
| // much time has passed; please re-run your |
| // search") or sneakily just switch to newest |
| // searcher w/o telling them... |
| if (VERBOSE) { |
| System.out.println(" searcher expired during search: " + see); |
| see.printStackTrace(System.out); |
| } |
| // We can't do this in general: on a very slow |
| // computer it's possible the local searcher |
| // expires before we can finish our search: |
| // assert prevSearchState != null; |
| if (prevSearchState != null) { |
| priorSearches.remove(prevSearchState); |
| } |
| } |
| } |
| } finally { |
| nodes[myNodeID].release(localShardSearcher); |
| for(IndexReader sub : subs) { |
| if (sub != null) { |
| sub.decRef(); |
| } |
| } |
| } |
| |
| if (searchState != null && searchState.searchAfterLocal != null && random().nextInt(5) == 3) { |
| priorSearches.add(searchState); |
| if (priorSearches.size() > 200) { |
| Collections.shuffle(priorSearches, random()); |
| priorSearches.subList(100, priorSearches.size()).clear(); |
| } |
| } |
| } |
| |
| finish(); |
| } |
| |
| private PreviousSearchState assertSame(IndexSearcher mockSearcher, NodeState.ShardIndexSearcher shardSearcher, Query q, Sort sort, PreviousSearchState state) throws IOException { |
| |
| int numHits = _TestUtil.nextInt(random(), 1, 100); |
| if (state != null && state.searchAfterLocal == null) { |
| // In addition to what we last searched: |
| numHits += state.numHitsPaged; |
| } |
| |
| if (VERBOSE) { |
| System.out.println("TEST: query=" + q + " sort=" + sort + " numHits=" + numHits); |
| if (state != null) { |
| System.out.println(" prev: searchAfterLocal=" + state.searchAfterLocal + " searchAfterShard=" + state.searchAfterShard + " numHitsPaged=" + state.numHitsPaged); |
| } |
| } |
| |
| // Single (mock local) searcher: |
| final TopDocs hits; |
| if (sort == null) { |
| if (state != null && state.searchAfterLocal != null) { |
| hits = mockSearcher.searchAfter(state.searchAfterLocal, q, numHits); |
| } else { |
| hits = mockSearcher.search(q, numHits); |
| } |
| } else { |
| hits = mockSearcher.search(q, numHits, sort); |
| } |
| |
| // Shard searcher |
| final TopDocs shardHits; |
| if (sort == null) { |
| if (state != null && state.searchAfterShard != null) { |
| shardHits = shardSearcher.searchAfter(state.searchAfterShard, q, numHits); |
| } else { |
| shardHits = shardSearcher.search(q, numHits); |
| } |
| } else { |
| shardHits = shardSearcher.search(q, numHits, sort); |
| } |
| |
| final int numNodes = shardSearcher.nodeVersions.length; |
| int[] base = new int[numNodes]; |
| final List<IndexReaderContext> subs = mockSearcher.getTopReaderContext().children(); |
| assertEquals(numNodes, subs.size()); |
| |
| for(int nodeID=0;nodeID<numNodes;nodeID++) { |
| base[nodeID] = subs.get(nodeID).docBaseInParent; |
| } |
| |
| if (VERBOSE) { |
| /* |
| for(int shardID=0;shardID<shardSearchers.length;shardID++) { |
| System.out.println(" shard=" + shardID + " maxDoc=" + shardSearchers[shardID].searcher.getIndexReader().maxDoc()); |
| } |
| */ |
| System.out.println(" single searcher: " + hits.totalHits + " totalHits maxScore=" + hits.getMaxScore()); |
| for(int i=0;i<hits.scoreDocs.length;i++) { |
| final ScoreDoc sd = hits.scoreDocs[i]; |
| System.out.println(" doc=" + sd.doc + " score=" + sd.score); |
| } |
| System.out.println(" shard searcher: " + shardHits.totalHits + " totalHits maxScore=" + shardHits.getMaxScore()); |
| for(int i=0;i<shardHits.scoreDocs.length;i++) { |
| final ScoreDoc sd = shardHits.scoreDocs[i]; |
| System.out.println(" doc=" + sd.doc + " (rebased: " + (sd.doc + base[sd.shardIndex]) + ") score=" + sd.score + " shard=" + sd.shardIndex); |
| } |
| } |
| |
| int numHitsPaged; |
| if (state != null && state.searchAfterLocal != null) { |
| numHitsPaged = hits.scoreDocs.length; |
| if (state != null) { |
| numHitsPaged += state.numHitsPaged; |
| } |
| } else { |
| numHitsPaged = hits.scoreDocs.length; |
| } |
| |
| final boolean moreHits; |
| |
| final ScoreDoc bottomHit; |
| final ScoreDoc bottomHitShards; |
| |
| if (numHitsPaged < hits.totalHits) { |
| // More hits to page through |
| moreHits = true; |
| if (sort == null) { |
| bottomHit = hits.scoreDocs[hits.scoreDocs.length-1]; |
| final ScoreDoc sd = shardHits.scoreDocs[shardHits.scoreDocs.length-1]; |
| // Must copy because below we rebase: |
| bottomHitShards = new ScoreDoc(sd.doc, sd.score, sd.shardIndex); |
| if (VERBOSE) { |
| System.out.println(" save bottomHit=" + bottomHit); |
| } |
| } else { |
| bottomHit = null; |
| bottomHitShards = null; |
| } |
| |
| } else { |
| assertEquals(hits.totalHits, numHitsPaged); |
| bottomHit = null; |
| bottomHitShards = null; |
| moreHits = false; |
| } |
| |
| // Must rebase so assertEquals passes: |
| for(int hitID=0;hitID<shardHits.scoreDocs.length;hitID++) { |
| final ScoreDoc sd = shardHits.scoreDocs[hitID]; |
| sd.doc += base[sd.shardIndex]; |
| } |
| |
| _TestUtil.assertEquals(hits, shardHits); |
| |
| if (moreHits) { |
| // Return a continuation: |
| return new PreviousSearchState(q, sort, bottomHit, bottomHitShards, shardSearcher.nodeVersions, numHitsPaged); |
| } else { |
| return null; |
| } |
| } |
| } |