| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.queryparser.classic; |
| |
| import java.io.IOException; |
| import java.util.Objects; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.analysis.tokenattributes.TypeAttribute; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.QueryVisitor; |
| |
| /** |
| * Test QueryParser's ability to deal with Analyzers that return more |
| * than one token per position or that return tokens with a position |
| * increment > 1. |
| * |
| */ |
| public class TestMultiAnalyzer extends BaseTokenStreamTestCase { |
| |
| private static int multiToken = 0; |
| |
| public void testMultiAnalyzer() throws ParseException { |
| |
| QueryParser qp = new QueryParser("", new MultiAnalyzer()); |
| |
| // trivial, no multiple tokens: |
| assertEquals("foo", qp.parse("foo").toString()); |
| assertEquals("foo", qp.parse("\"foo\"").toString()); |
| assertEquals("foo foobar", qp.parse("foo foobar").toString()); |
| assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString()); |
| assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"").toString()); |
| |
| // two tokens at the same position: |
| assertEquals("Synonym(multi multi2) foo", qp.parse("multi foo").toString()); |
| assertEquals("foo Synonym(multi multi2)", qp.parse("foo multi").toString()); |
| assertEquals("Synonym(multi multi2) Synonym(multi multi2)", qp.parse("multi multi").toString()); |
| assertEquals("+(foo Synonym(multi multi2)) +(bar Synonym(multi multi2))", |
| qp.parse("+(foo multi) +(bar multi)").toString()); |
| assertEquals("+(foo Synonym(multi multi2)) field:\"bar (multi multi2)\"", |
| qp.parse("+(foo multi) field:\"bar multi\"").toString()); |
| |
| // phrases: |
| assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString()); |
| assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString()); |
| assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", |
| qp.parse("\"foo multi foobar multi\"").toString()); |
| |
| // fields: |
| assertEquals("Synonym(field:multi field:multi2) field:foo", qp.parse("field:multi field:foo").toString()); |
| assertEquals("field:\"(multi multi2) foo\"", qp.parse("field:\"multi foo\"").toString()); |
| |
| // three tokens at one position: |
| assertEquals("Synonym(multi2 multi3 triplemulti)", qp.parse("triplemulti").toString()); |
| assertEquals("foo Synonym(multi2 multi3 triplemulti) foobar", |
| qp.parse("foo triplemulti foobar").toString()); |
| |
| // phrase with non-default slop: |
| assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10").toString()); |
| |
| // phrase with non-default boost: |
| assertEquals("(\"(multi multi2) foo\")^2.0", qp.parse("\"multi foo\"^2").toString()); |
| |
| // phrase after changing default slop |
| qp.setPhraseSlop(99); |
| assertEquals("\"(multi multi2) foo\"~99 bar", |
| qp.parse("\"multi foo\" bar").toString()); |
| assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", |
| qp.parse("\"multi foo\" \"foo bar\"~2").toString()); |
| qp.setPhraseSlop(0); |
| |
| // non-default operator: |
| qp.setDefaultOperator(QueryParserBase.AND_OPERATOR); |
| assertEquals("+Synonym(multi multi2) +foo", qp.parse("multi foo").toString()); |
| |
| } |
| |
| public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException { |
| |
| DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer()); |
| qp.setPhraseSlop(99); // modified default slop |
| |
| // direct call to (super's) getFieldQuery to demonstrate differnce |
| // between phrase and multiphrase with modified default slop |
| assertEquals("\"foo bar\"~99", |
| qp.getSuperFieldQuery("","foo bar", true).toString()); |
| assertEquals("\"(multi multi2) bar\"~99", |
| qp.getSuperFieldQuery("","multi bar", true).toString()); |
| |
| |
| // ask sublcass to parse phrase with modified default slop |
| assertEquals("\"(multi multi2) foo\"~99 bar", |
| qp.parse("\"multi foo\" bar").toString()); |
| |
| } |
| |
| public void testPosIncrementAnalyzer() throws ParseException { |
| QueryParser qp = new QueryParser("", new PosIncrementAnalyzer()); |
| assertEquals("quick brown", qp.parse("the quick brown").toString()); |
| assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); |
| } |
| |
| /** |
| * Expands "multi" to "multi" and "multi2", both at the same position, |
| * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". |
| */ |
| private static class MultiAnalyzer extends Analyzer { |
| |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); |
| return new TokenStreamComponents(result, new TestFilter(result)); |
| } |
| } |
| |
| private static final class TestFilter extends TokenFilter { |
| |
| private String prevType; |
| private int prevStartOffset; |
| private int prevEndOffset; |
| |
| private final CharTermAttribute termAtt; |
| private final PositionIncrementAttribute posIncrAtt; |
| private final OffsetAttribute offsetAtt; |
| private final TypeAttribute typeAtt; |
| |
| public TestFilter(TokenStream in) { |
| super(in); |
| termAtt = addAttribute(CharTermAttribute.class); |
| posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| offsetAtt = addAttribute(OffsetAttribute.class); |
| typeAtt = addAttribute(TypeAttribute.class); |
| } |
| |
| @Override |
| public final boolean incrementToken() throws java.io.IOException { |
| if (multiToken > 0) { |
| termAtt.setEmpty().append("multi"+(multiToken+1)); |
| offsetAtt.setOffset(prevStartOffset, prevEndOffset); |
| typeAtt.setType(prevType); |
| posIncrAtt.setPositionIncrement(0); |
| multiToken--; |
| return true; |
| } else { |
| boolean next = input.incrementToken(); |
| if (!next) { |
| return false; |
| } |
| prevType = typeAtt.type(); |
| prevStartOffset = offsetAtt.startOffset(); |
| prevEndOffset = offsetAtt.endOffset(); |
| String text = termAtt.toString(); |
| if (text.equals("triplemulti")) { |
| multiToken = 2; |
| return true; |
| } else if (text.equals("multi")) { |
| multiToken = 1; |
| return true; |
| } else { |
| return true; |
| } |
| } |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| this.prevType = null; |
| this.prevStartOffset = 0; |
| this.prevEndOffset = 0; |
| } |
| } |
| |
| /** |
| * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). |
| * Does not work correctly for input other than "the quick brown ...". |
| */ |
| private static class PosIncrementAnalyzer extends Analyzer { |
| |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); |
| return new TokenStreamComponents(result, new TestPosIncrementFilter(result)); |
| } |
| } |
| |
| private static final class TestPosIncrementFilter extends TokenFilter { |
| |
| CharTermAttribute termAtt; |
| PositionIncrementAttribute posIncrAtt; |
| |
| public TestPosIncrementFilter(TokenStream in) { |
| super(in); |
| termAtt = addAttribute(CharTermAttribute.class); |
| posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| } |
| |
| @Override |
| public final boolean incrementToken () throws java.io.IOException { |
| while(input.incrementToken()) { |
| if (termAtt.toString().equals("the")) { |
| // stopword, do nothing |
| } else if (termAtt.toString().equals("quick")) { |
| posIncrAtt.setPositionIncrement(2); |
| return true; |
| } else { |
| posIncrAtt.setPositionIncrement(1); |
| return true; |
| } |
| } |
| return false; |
| } |
| } |
| |
| /** a very simple subclass of QueryParser */ |
| private final static class DumbQueryParser extends QueryParser { |
| |
| public DumbQueryParser(String f, Analyzer a) { |
| super(f, a); |
| } |
| |
| /** expose super's version */ |
| public Query getSuperFieldQuery(String f, String t, boolean quoted) |
| throws ParseException { |
| return super.getFieldQuery(f,t,quoted); |
| } |
| /** wrap super's version */ |
| @Override |
| protected Query getFieldQuery(String f, String t, boolean quoted) |
| throws ParseException { |
| return new DumbQueryWrapper(getSuperFieldQuery(f,t,quoted)); |
| } |
| } |
| |
| /** |
| * A very simple wrapper to prevent instanceof checks but uses |
| * the toString of the query it wraps. |
| */ |
| private final static class DumbQueryWrapper extends Query { |
| private Query q; |
| |
| public DumbQueryWrapper(Query q) { |
| this.q = Objects.requireNonNull(q); |
| } |
| @Override |
| public String toString(String f) { |
| return q.toString(f); |
| } |
| |
| @Override |
| public void visit(QueryVisitor visitor) { |
| q.visit(visitor); |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| return sameClassAs(other) && |
| Objects.equals(q, ((DumbQueryWrapper) other).q); |
| } |
| |
| @Override |
| public int hashCode() { |
| return classHash() & q.hashCode(); |
| } |
| } |
| |
| } |