| Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java |
| =================================================================== |
| --- lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (révision 1483292) |
| +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java (copie de travail) |
| @@ -19,13 +19,16 @@ |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockTokenizer; |
| +import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| import org.apache.lucene.analysis.Tokenizer; |
| import org.apache.lucene.analysis.core.KeywordTokenizer; |
| import org.apache.lucene.analysis.core.WhitespaceTokenizer; |
| import org.apache.lucene.analysis.position.PositionFilter; |
| +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| |
| +import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.util.Random; |
| @@ -101,9 +104,39 @@ |
| false); |
| } |
| |
| + private static class PositionFilter extends TokenFilter { |
| + |
| + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); |
| + private boolean started; |
| + |
| + PositionFilter(final TokenStream input) { |
| + super(input); |
| + } |
| + |
| + @Override |
| + public final boolean incrementToken() throws IOException { |
| + if (input.incrementToken()) { |
| + if (started) { |
| + posIncrAtt.setPositionIncrement(0); |
| + } else { |
| + started = true; |
| + } |
| + return true; |
| + } else { |
| + return false; |
| + } |
| + } |
| + |
| + @Override |
| + public void reset() throws IOException { |
| + super.reset(); |
| + started = false; |
| + } |
| + } |
| + |
| public void testFirstTokenPositionIncrement() throws Exception { |
| TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false); |
| - ts = new PositionFilter(ts, 0); // All but first token will get 0 position increment |
| + ts = new PositionFilter(ts); // All but first token will get 0 position increment |
| EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3); |
| // The first token "a" will not be output, since it's smaller than the mingram size of 2. |
| // The second token on input to EdgeNGramTokenFilter will have position increment of 0, |
| Index: lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java |
| =================================================================== |
| --- lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (révision 1483292) |
| +++ lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (copie de travail) |
| @@ -174,8 +174,6 @@ |
| PathHierarchyTokenizer.class, |
| HyphenationCompoundWordTokenFilter.class, |
| DictionaryCompoundWordTokenFilter.class, |
| - // TODO: corrumpts graphs (offset consistency check): |
| - PositionFilter.class, |
| // TODO: it seems to mess up offsets!? |
| WikipediaTokenizer.class, |
| // TODO: doesn't handle graph inputs |
| Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java |
| =================================================================== |
| --- lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (révision 1483292) |
| +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java (copie de travail) |
| @@ -22,11 +22,19 @@ |
| import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| +import org.apache.lucene.queryparser.classic.QueryParser; |
| |
| /** Set the positionIncrement of all tokens to the "positionIncrement", |
| * except the first return token which retains its original positionIncrement value. |
| * The default positionIncrement value is zero. |
| + * @deprecated (4.4) PositionFilter makes {@link TokenStream} graphs inconsistent |
| + * which can cause highlighting bugs. Its main use-case being to make |
| + * {@link QueryParser} generate boolean queries instead of phrase |
| + * queries, it is now advised to use |
| + * {@link QueryParser#setAutoGeneratePhraseQueries(boolean) QueryParser.setAutoGeneratePhraseQueries(false)} |
| + * (for simple cases) or to override {@link QueryParser#newFieldQuery}. |
| */ |
| +@Deprecated |
| public final class PositionFilter extends TokenFilter { |
| |
| /** Position increment to assign to all but the first token - default = 0 */ |
| Index: lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java |
| =================================================================== |
| --- lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (révision 1483292) |
| +++ lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java (copie de travail) |
| @@ -20,6 +20,7 @@ |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.position.PositionFilter; |
| import org.apache.lucene.analysis.util.TokenFilterFactory; |
| +import org.apache.lucene.util.Version; |
| |
| import java.util.Map; |
| |
| @@ -37,7 +38,9 @@ |
| * |
| * @see org.apache.lucene.analysis.position.PositionFilter |
| * @since solr 1.4 |
| + * @deprecated (4.4) |
| */ |
| +@Deprecated |
| public class PositionFilterFactory extends TokenFilterFactory { |
| private final int positionIncrement; |
| |
| @@ -48,6 +51,9 @@ |
| if (!args.isEmpty()) { |
| throw new IllegalArgumentException("Unknown parameters: " + args); |
| } |
| + if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44)) { |
| + throw new IllegalArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility"); |
| + } |
| } |
| |
| @Override |
| Index: lucene/CHANGES.txt |
| =================================================================== |
| --- lucene/CHANGES.txt (révision 1483292) |
| +++ lucene/CHANGES.txt (copie de travail) |
| @@ -76,6 +76,12 @@ |
| * LUCENE-3907: EdgeNGramTokenFilter does not support backward grams and does |
| not update offsets anymore. (Adrien Grand) |
| |
| +* LUCENE-4981: PositionFilter is now deprecated as it can corrupt token stream |
| + graphs. Since it main use-case was to make query parsers generate boolean |
| + queries instead of phrase queries, it is now advised to use |
| + QueryParser.setAutoGeneratePhraseQueries(false) (for simple cases) or to |
| + override QueryParser.newFieldQuery. (Adrien Grand, Steve Rowe) |
| + |
| Bug Fixes |
| |
| * LUCENE-4997: Internal test framework's tests are sensitive to previous |