| package org.apache.lucene.search.payloads; |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.English; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.QueryUtils; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.ScoreDoc; |
| import org.apache.lucene.search.CheckHits; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.DefaultSimilarity; |
| import org.apache.lucene.search.spans.SpanTermQuery; |
| import org.apache.lucene.search.spans.Spans; |
| import org.apache.lucene.search.spans.TermSpans; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.LowerCaseTokenizer; |
| import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| import org.apache.lucene.index.FieldInvertState; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Payload; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| |
| import java.io.Reader; |
| import java.io.IOException; |
| |
| |
| /** |
| * |
| * |
| **/ |
| public class TestPayloadTermQuery extends LuceneTestCase { |
| private IndexSearcher searcher; |
| private IndexReader reader; |
| private BoostingSimilarity similarity = new BoostingSimilarity(); |
| private byte[] payloadField = new byte[]{1}; |
| private byte[] payloadMultiField1 = new byte[]{2}; |
| private byte[] payloadMultiField2 = new byte[]{4}; |
| protected Directory directory; |
| |
| private class PayloadAnalyzer extends Analyzer { |
| |
| |
| @Override |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| TokenStream result = new LowerCaseTokenizer(TEST_VERSION_CURRENT, reader); |
| result = new PayloadFilter(result, fieldName); |
| return result; |
| } |
| } |
| |
| private class PayloadFilter extends TokenFilter { |
| private final String fieldName; |
| private int numSeen = 0; |
| |
| private final PayloadAttribute payloadAtt; |
| |
| public PayloadFilter(TokenStream input, String fieldName) { |
| super(input); |
| this.fieldName = fieldName; |
| payloadAtt = addAttribute(PayloadAttribute.class); |
| } |
| |
| @Override |
| public boolean incrementToken() throws IOException { |
| boolean hasNext = input.incrementToken(); |
| if (hasNext) { |
| if (fieldName.equals("field")) { |
| payloadAtt.setPayload(new Payload(payloadField)); |
| } else if (fieldName.equals("multiField")) { |
| if (numSeen % 2 == 0) { |
| payloadAtt.setPayload(new Payload(payloadMultiField1)); |
| } else { |
| payloadAtt.setPayload(new Payload(payloadMultiField2)); |
| } |
| numSeen++; |
| } |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| this.numSeen = 0; |
| } |
| } |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| directory = newDirectory(); |
| RandomIndexWriter writer = new RandomIndexWriter(random, directory, |
| newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()) |
| .setSimilarity(similarity).setMergePolicy(newLogMergePolicy())); |
| //writer.infoStream = System.out; |
| for (int i = 0; i < 1000; i++) { |
| Document doc = new Document(); |
| Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); |
| //noPayloadField.setBoost(0); |
| doc.add(noPayloadField); |
| doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); |
| doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); |
| writer.addDocument(doc); |
| } |
| reader = writer.getReader(); |
| writer.close(); |
| |
| searcher = newSearcher(reader); |
| searcher.setSimilarity(similarity); |
| } |
| |
| @Override |
| public void tearDown() throws Exception { |
| searcher.close(); |
| reader.close(); |
| directory.close(); |
| super.tearDown(); |
| } |
| |
| public void test() throws IOException { |
| PayloadTermQuery query = new PayloadTermQuery(new Term("field", "seventy"), |
| new MaxPayloadFunction()); |
| TopDocs hits = searcher.search(query, null, 100); |
| assertTrue("hits is null and it shouldn't be", hits != null); |
| assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); |
| |
| //they should all have the exact same score, because they all contain seventy once, and we set |
| //all the other similarity factors to be 1 |
| |
| assertTrue(hits.getMaxScore() + " does not equal: " + 1, hits.getMaxScore() == 1); |
| for (int i = 0; i < hits.scoreDocs.length; i++) { |
| ScoreDoc doc = hits.scoreDocs[i]; |
| assertTrue(doc.score + " does not equal: " + 1, doc.score == 1); |
| } |
| CheckHits.checkExplanations(query, PayloadHelper.FIELD, searcher, true); |
| Spans spans = query.getSpans(searcher.getIndexReader()); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); |
| /*float score = hits.score(0); |
| for (int i =1; i < hits.length(); i++) |
| { |
| assertTrue("scores are not equal and they should be", score == hits.score(i)); |
| }*/ |
| |
| } |
| |
| public void testQuery() { |
| PayloadTermQuery boostingFuncTermQuery = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), |
| new MaxPayloadFunction()); |
| QueryUtils.check(boostingFuncTermQuery); |
| |
| SpanTermQuery spanTermQuery = new SpanTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy")); |
| |
| assertTrue(boostingFuncTermQuery.equals(spanTermQuery) == spanTermQuery.equals(boostingFuncTermQuery)); |
| |
| PayloadTermQuery boostingFuncTermQuery2 = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), |
| new AveragePayloadFunction()); |
| |
| QueryUtils.checkUnequal(boostingFuncTermQuery, boostingFuncTermQuery2); |
| } |
| |
| public void testMultipleMatchesPerDoc() throws Exception { |
| PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), |
| new MaxPayloadFunction()); |
| TopDocs hits = searcher.search(query, null, 100); |
| assertTrue("hits is null and it shouldn't be", hits != null); |
| assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); |
| |
| //they should all have the exact same score, because they all contain seventy once, and we set |
| //all the other similarity factors to be 1 |
| |
| //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); |
| assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0); |
| //there should be exactly 10 items that score a 4, all the rest should score a 2 |
| //The 10 items are: 70 + i*100 where i in [0-9] |
| int numTens = 0; |
| for (int i = 0; i < hits.scoreDocs.length; i++) { |
| ScoreDoc doc = hits.scoreDocs[i]; |
| if (doc.doc % 10 == 0) { |
| numTens++; |
| assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0); |
| } else { |
| assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); |
| } |
| } |
| assertTrue(numTens + " does not equal: " + 10, numTens == 10); |
| CheckHits.checkExplanations(query, "field", searcher, true); |
| Spans spans = query.getSpans(searcher.getIndexReader()); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); |
| //should be two matches per document |
| int count = 0; |
| //100 hits times 2 matches per hit, we should have 200 in count |
| while (spans.next()) { |
| count++; |
| } |
| assertTrue(count + " does not equal: " + 200, count == 200); |
| } |
| |
| //Set includeSpanScore to false, in which case just the payload score comes through. |
| public void testIgnoreSpanScorer() throws Exception { |
| PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"), |
| new MaxPayloadFunction(), false); |
| |
| IndexSearcher theSearcher = new IndexSearcher(directory, true); |
| theSearcher.setSimilarity(new FullSimilarity()); |
| TopDocs hits = searcher.search(query, null, 100); |
| assertTrue("hits is null and it shouldn't be", hits != null); |
| assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100); |
| |
| //they should all have the exact same score, because they all contain seventy once, and we set |
| //all the other similarity factors to be 1 |
| |
| //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash); |
| assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0); |
| //there should be exactly 10 items that score a 4, all the rest should score a 2 |
| //The 10 items are: 70 + i*100 where i in [0-9] |
| int numTens = 0; |
| for (int i = 0; i < hits.scoreDocs.length; i++) { |
| ScoreDoc doc = hits.scoreDocs[i]; |
| if (doc.doc % 10 == 0) { |
| numTens++; |
| assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0); |
| } else { |
| assertTrue(doc.score + " does not equal: " + 2, doc.score == 2); |
| } |
| } |
| assertTrue(numTens + " does not equal: " + 10, numTens == 10); |
| CheckHits.checkExplanations(query, "field", searcher, true); |
| Spans spans = query.getSpans(searcher.getIndexReader()); |
| assertTrue("spans is null and it shouldn't be", spans != null); |
| assertTrue("spans is not an instanceof " + TermSpans.class, spans instanceof TermSpans); |
| //should be two matches per document |
| int count = 0; |
| //100 hits times 2 matches per hit, we should have 200 in count |
| while (spans.next()) { |
| count++; |
| } |
| theSearcher.close(); |
| } |
| |
| public void testNoMatch() throws Exception { |
| PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.FIELD, "junk"), |
| new MaxPayloadFunction()); |
| TopDocs hits = searcher.search(query, null, 100); |
| assertTrue("hits is null and it shouldn't be", hits != null); |
| assertTrue("hits Size: " + hits.totalHits + " is not: " + 0, hits.totalHits == 0); |
| |
| } |
| |
| public void testNoPayload() throws Exception { |
| PayloadTermQuery q1 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "zero"), |
| new MaxPayloadFunction()); |
| PayloadTermQuery q2 = new PayloadTermQuery(new Term(PayloadHelper.NO_PAYLOAD_FIELD, "foo"), |
| new MaxPayloadFunction()); |
| BooleanClause c1 = new BooleanClause(q1, BooleanClause.Occur.MUST); |
| BooleanClause c2 = new BooleanClause(q2, BooleanClause.Occur.MUST_NOT); |
| BooleanQuery query = new BooleanQuery(); |
| query.add(c1); |
| query.add(c2); |
| TopDocs hits = searcher.search(query, null, 100); |
| assertTrue("hits is null and it shouldn't be", hits != null); |
| assertTrue("hits Size: " + hits.totalHits + " is not: " + 1, hits.totalHits == 1); |
| int[] results = new int[1]; |
| results[0] = 0;//hits.scoreDocs[0].doc; |
| CheckHits.checkHitCollector(random, query, PayloadHelper.NO_PAYLOAD_FIELD, searcher, results); |
| } |
| |
| // must be static for weight serialization tests |
| static class BoostingSimilarity extends DefaultSimilarity { |
| |
| // TODO: Remove warning after API has been finalized |
| @Override |
| public float scorePayload(int docId, String fieldName, int start, int end, byte[] payload, int offset, int length) { |
| //we know it is size 4 here, so ignore the offset/length |
| return payload[0]; |
| } |
| |
| //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| //Make everything else 1 so we see the effect of the payload |
| //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| @Override |
| public float computeNorm(String fieldName, FieldInvertState state) { |
| return state.getBoost(); |
| } |
| |
| @Override |
| public float queryNorm(float sumOfSquaredWeights) { |
| return 1; |
| } |
| |
| @Override |
| public float sloppyFreq(int distance) { |
| return 1; |
| } |
| |
| @Override |
| public float coord(int overlap, int maxOverlap) { |
| return 1; |
| } |
| |
| @Override |
| public float idf(int docFreq, int numDocs) { |
| return 1; |
| } |
| |
| @Override |
| public float tf(float freq) { |
| return freq == 0 ? 0 : 1; |
| } |
| } |
| |
| static class FullSimilarity extends DefaultSimilarity{ |
| public float scorePayload(int docId, String fieldName, byte[] payload, int offset, int length) { |
| //we know it is size 4 here, so ignore the offset/length |
| return payload[0]; |
| } |
| } |
| |
| } |