| Index: CHANGES.txt |
| =================================================================== |
| --- CHANGES.txt (revision 727243) |
| +++ CHANGES.txt (working copy) |
| @@ -182,6 +182,11 @@ |
| |
| 24. LUCENE-1131: Added numDeletedDocs method to IndexReader (Otis Gospodnetic) |
| |
| +25. LUCENE-1494: Deprecated Analyzer.getPositionIncrementGap(String) in favour |
| + of getPositionIncrementGap(String, int), which is aware of the current |
| + position and can be used to 'line up' terms across variable-length fields. |
| + (Paul Cowan) |
| + |
| Bug fixes |
| |
| 1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single |
| Index: src/java/org/apache/lucene/index/DocInverterPerField.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/DocInverterPerField.java (revision 727243) |
| +++ src/java/org/apache/lucene/index/DocInverterPerField.java (working copy) |
| @@ -74,7 +74,8 @@ |
| if (field.isIndexed() && doInvert) { |
| |
| if (fieldState.length > 0) |
| - fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name); |
| + fieldState.position += docState.analyzer.getPositionIncrementGap( |
| + fieldInfo.name, fieldState.position); |
| |
| if (!field.isTokenized()) { // un-tokenized field |
| String stringValue = field.stringValue(); |
| Index: src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (revision 727243) |
| +++ src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (working copy) |
| @@ -85,11 +85,11 @@ |
| } |
| |
| /** Return the positionIncrementGap from the analyzer assigned to fieldName */ |
| - public int getPositionIncrementGap(String fieldName) { |
| + public int getPositionIncrementGap(String fieldName, int lastTokenPosition) { |
| Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName); |
| if (analyzer == null) |
| analyzer = defaultAnalyzer; |
| - return analyzer.getPositionIncrementGap(fieldName); |
| + return analyzer.getPositionIncrementGap(fieldName, lastTokenPosition); |
| } |
| |
| public String toString() { |
| Index: src/java/org/apache/lucene/analysis/Analyzer.java |
| =================================================================== |
| --- src/java/org/apache/lucene/analysis/Analyzer.java (revision 727243) |
| +++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy) |
| @@ -62,6 +62,21 @@ |
| |
| |
| /** |
| + * Provides a constant gap between the position values of tokens |
| + * from different Fieldable instances which share the same field name. |
| + * Used by the default implementation of |
| + * {@link #getPositionIncrementGap(String, int)}. |
| + * |
| + * @param fieldName Fieldable name being indexed. |
| + * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} |
| + * @deprecated replaced with {@link #getPositionIncrementGap(String, int)} |
| + */ |
| + protected int getPositionIncrementGap(String fieldName) |
| + { |
| + return 0; |
| + } |
| + |
| + /** |
| * Invoked before indexing a Fieldable instance if |
| * terms have already been added to that field. This allows custom |
| * analyzers to place an automatic position increment gap between |
| @@ -70,12 +85,18 @@ |
| * the typical default token position increment of 1, all terms in a field, |
| * including across Fieldable instances, are in successive positions, allowing |
| * exact PhraseQuery matches, for instance, across Fieldable instance boundaries. |
| + * The last token position is supplied to enable analyzers to 'line up' terms; |
| + * for example, for subsequent terms to start at positions that are multiples |
| + * of 100. Defaults to the constant gap supplied by |
| + * {@link #getPositionIncrementGap(String)}. |
| * |
| * @param fieldName Fieldable name being indexed. |
| - * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} |
| + * @param lastTokenPosition the last token position for this field |
| + * @return position increment gap, added to the next token emitted from |
| + * {@link #tokenStream(String,Reader)} |
| */ |
| - public int getPositionIncrementGap(String fieldName) |
| + public int getPositionIncrementGap(String fieldName, int lastTokenPosition) |
| { |
| - return 0; |
| + return getPositionIncrementGap(fieldName); |
| } |
| } |
| Index: src/test/org/apache/lucene/index/TestDocumentWriter.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 727243) |
| +++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) |
| @@ -102,7 +102,8 @@ |
| } |
| } |
| |
| - public void testPositionIncrementGap() throws IOException { |
| + public void testOldPositionIncrementGap() throws IOException { |
| + // Tests the use of the legacy getPositionIncrementGap(String) |
| Analyzer analyzer = new Analyzer() { |
| public TokenStream tokenStream(String fieldName, Reader reader) { |
| return new WhitespaceTokenizer(reader); |
| @@ -112,6 +113,26 @@ |
| return 500; |
| } |
| }; |
| + assertExpectedPositionIncrementGap(analyzer, 2, 0, 502); |
| + } |
| + |
| + public void testNewPositionIncrementGap() throws IOException { |
| + // Tests the use of the new getPositionIncrementGap(String, int) |
| + Analyzer analyzer = new Analyzer() { |
| + public TokenStream tokenStream(String fieldName, Reader reader) { |
| + return new WhitespaceTokenizer(reader); |
| + } |
| + |
| + public int getPositionIncrementGap(String fieldName, int lastTokenPosition) { |
| + // Calculate gap to round up to nearest 500 |
| + return (((lastTokenPosition / 500) + 1) * 500) - lastTokenPosition; |
| + } |
| + }; |
| + assertExpectedPositionIncrementGap(analyzer, 2, 0, 500); |
| + } |
| + |
| + private void assertExpectedPositionIncrementGap(Analyzer analyzer, |
| + int expectedFreq, int firstPosition, int secondPosition) throws IOException { |
| |
| IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); |
| |
| @@ -128,9 +149,9 @@ |
| TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated")); |
| assertTrue(termPositions.next()); |
| int freq = termPositions.freq(); |
| - assertEquals(2, freq); |
| - assertEquals(0, termPositions.nextPosition()); |
| - assertEquals(502, termPositions.nextPosition()); |
| + assertEquals(expectedFreq, freq); |
| + assertEquals(firstPosition, termPositions.nextPosition()); |
| + assertEquals(secondPosition, termPositions.nextPosition()); |
| } |
| |
| public void testTokenReuse() throws IOException { |