| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.queryparser.flexible.standard; |
| |
| import java.io.IOException; |
| import org.apache.lucene.analysis.*; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.analysis.tokenattributes.TypeAttribute; |
| import org.apache.lucene.queryparser.flexible.core.QueryNodeException; |
| import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| /** |
| * This test case is a copy of the core Lucene query parser test, it was adapted |
| * to use new QueryParserHelper instead of the old query parser. |
| * |
| * Test QueryParser's ability to deal with Analyzers that return more than one |
| * token per position or that return tokens with a position increment > 1. |
| */ |
| public class TestMultiAnalyzerQPHelper extends LuceneTestCase { |
| |
| private static int multiToken = 0; |
| |
| public void testMultiAnalyzer() throws QueryNodeException { |
| |
| StandardQueryParser qp = new StandardQueryParser(); |
| qp.setAnalyzer(new MultiAnalyzer()); |
| |
| // trivial, no multiple tokens: |
| assertEquals("foo", qp.parse("foo", "").toString()); |
| assertEquals("foo", qp.parse("\"foo\"", "").toString()); |
| assertEquals("foo foobar", qp.parse("foo foobar", "").toString()); |
| assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"", "").toString()); |
| assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"", "") |
| .toString()); |
| |
| // two tokens at the same position: |
| assertEquals("(multi multi2) foo", qp.parse("multi foo", "").toString()); |
| assertEquals("foo (multi multi2)", qp.parse("foo multi", "").toString()); |
| assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi", "") |
| .toString()); |
| assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", qp.parse( |
| "+(foo multi) +(bar multi)", "").toString()); |
| assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp |
| .parse("+(foo multi) field:\"bar multi\"", "").toString()); |
| |
| // phrases: |
| assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"", "") |
| .toString()); |
| assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"", "") |
| .toString()); |
| assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", qp.parse( |
| "\"foo multi foobar multi\"", "").toString()); |
| |
| // fields: |
| assertEquals("(field:multi field:multi2) field:foo", qp.parse( |
| "field:multi field:foo", "").toString()); |
| assertEquals("field:\"(multi multi2) foo\"", qp.parse( |
| "field:\"multi foo\"", "").toString()); |
| |
| // three tokens at one position: |
| assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti", "") |
| .toString()); |
| assertEquals("foo (triplemulti multi3 multi2) foobar", qp.parse( |
| "foo triplemulti foobar", "").toString()); |
| |
| // phrase with non-default slop: |
| assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10", "") |
| .toString()); |
| |
| // phrase with non-default boost: |
| assertEquals("(\"(multi multi2) foo\")^2.0", qp.parse("\"multi foo\"^2", "") |
| .toString()); |
| |
| // phrase after changing default slop |
| qp.setPhraseSlop(99); |
| assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse("\"multi foo\" bar", |
| "").toString()); |
| assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp.parse( |
| "\"multi foo\" \"foo bar\"~2", "").toString()); |
| qp.setPhraseSlop(0); |
| |
| // non-default operator: |
| qp.setDefaultOperator(StandardQueryConfigHandler.Operator.AND); |
| assertEquals("+(multi multi2) +foo", qp.parse("multi foo", "").toString()); |
| |
| } |
| |
| // public void testMultiAnalyzerWithSubclassOfQueryParser() throws |
| // ParseException { |
| // this test doesn't make sense when using the new QueryParser API |
| // DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer()); |
| // qp.setPhraseSlop(99); // modified default slop |
| // |
| // // direct call to (super's) getFieldQuery to demonstrate differnce |
| // // between phrase and multiphrase with modified default slop |
| // assertEquals("\"foo bar\"~99", |
| // qp.getSuperFieldQuery("","foo bar").toString()); |
| // assertEquals("\"(multi multi2) bar\"~99", |
| // qp.getSuperFieldQuery("","multi bar").toString()); |
| // |
| // |
| // // ask sublcass to parse phrase with modified default slop |
| // assertEquals("\"(multi multi2) foo\"~99 bar", |
| // qp.parse("\"multi foo\" bar").toString()); |
| // |
| // } |
| |
| public void testPosIncrementAnalyzer() throws QueryNodeException { |
| StandardQueryParser qp = new StandardQueryParser(); |
| qp.setAnalyzer(new PosIncrementAnalyzer()); |
| |
| assertEquals("quick brown", qp.parse("the quick brown", "").toString()); |
| assertEquals("\"? quick brown\"", qp.parse("\"the quick brown\"", "") |
| .toString()); |
| assertEquals("quick brown fox", qp.parse("the quick brown fox", "") |
| .toString()); |
| assertEquals("\"? quick brown fox\"", qp.parse("\"the quick brown fox\"", "") |
| .toString()); |
| } |
| |
| /** |
| * Expands "multi" to "multi" and "multi2", both at the same position, and |
| * expands "triplemulti" to "triplemulti", "multi3", and "multi2". |
| */ |
| private static class MultiAnalyzer extends Analyzer { |
| |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); |
| return new TokenStreamComponents(result, new TestFilter(result)); |
| } |
| } |
| |
| private static final class TestFilter extends TokenFilter { |
| |
| private String prevType; |
| private int prevStartOffset; |
| private int prevEndOffset; |
| |
| private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); |
| private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); |
| |
| public TestFilter(TokenStream in) { |
| super(in); |
| } |
| |
| @Override |
| public final boolean incrementToken() throws java.io.IOException { |
| if (multiToken > 0) { |
| termAtt.setEmpty().append("multi" + (multiToken + 1)); |
| offsetAtt.setOffset(prevStartOffset, prevEndOffset); |
| typeAtt.setType(prevType); |
| posIncrAtt.setPositionIncrement(0); |
| multiToken--; |
| return true; |
| } else { |
| boolean next = input.incrementToken(); |
| if (!next) { |
| return false; |
| } |
| prevType = typeAtt.type(); |
| prevStartOffset = offsetAtt.startOffset(); |
| prevEndOffset = offsetAtt.endOffset(); |
| String text = termAtt.toString(); |
| if (text.equals("triplemulti")) { |
| multiToken = 2; |
| return true; |
| } else if (text.equals("multi")) { |
| multiToken = 1; |
| return true; |
| } else { |
| return true; |
| } |
| } |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| this.prevType = null; |
| this.prevStartOffset = 0; |
| this.prevEndOffset = 0; |
| } |
| } |
| |
| /** |
| * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). Does not work |
| * correctly for input other than "the quick brown ...". |
| */ |
| private static class PosIncrementAnalyzer extends Analyzer { |
| |
| @Override |
| public TokenStreamComponents createComponents(String fieldName) { |
| Tokenizer result = new MockTokenizer(MockTokenizer.WHITESPACE, true); |
| return new TokenStreamComponents(result, new TestPosIncrementFilter(result)); |
| } |
| } |
| |
| private static class TestPosIncrementFilter extends TokenFilter { |
| |
| private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); |
| private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| |
| public TestPosIncrementFilter(TokenStream in) { |
| super(in); |
| } |
| |
| @Override |
| public final boolean incrementToken() throws java.io.IOException { |
| while (input.incrementToken()) { |
| if (termAtt.toString().equals("the")) { |
| // stopword, do nothing |
| } else if (termAtt.toString().equals("quick")) { |
| posIncrAtt.setPositionIncrement(2); |
| return true; |
| } else { |
| posIncrAtt.setPositionIncrement(1); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| } |
| |
| } |