blob: 0c901df30e35d38f6ced3456abbe26deb3bdc1df [file] [log] [blame]
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 727243)
+++ CHANGES.txt (working copy)
@@ -182,6 +182,11 @@
24. LUCENE-1131: Added numDeletedDocs method to IndexReader (Otis Gospodnetic)
+25. LUCENE-1494: Deprecated Analyzer.getPositionIncrementGap(String) in favour
+ of getPositionIncrementGap(String, int), which is aware of the current
+ position and can be used to 'line up' terms across variable-length fields.
+ (Paul Cowan)
+
Bug fixes
1. LUCENE-1134: Fixed BooleanQuery.rewrite to only optimize a single
Index: src/java/org/apache/lucene/index/DocInverterPerField.java
===================================================================
--- src/java/org/apache/lucene/index/DocInverterPerField.java (revision 727243)
+++ src/java/org/apache/lucene/index/DocInverterPerField.java (working copy)
@@ -74,7 +74,8 @@
if (field.isIndexed() && doInvert) {
if (fieldState.length > 0)
- fieldState.position += docState.analyzer.getPositionIncrementGap(fieldInfo.name);
+ fieldState.position += docState.analyzer.getPositionIncrementGap(
+ fieldInfo.name, fieldState.position);
if (!field.isTokenized()) { // un-tokenized field
String stringValue = field.stringValue();
Index: src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
===================================================================
--- src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (revision 727243)
+++ src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java (working copy)
@@ -85,11 +85,11 @@
}
/** Return the positionIncrementGap from the analyzer assigned to fieldName */
- public int getPositionIncrementGap(String fieldName) {
+ public int getPositionIncrementGap(String fieldName, int lastTokenPosition) {
Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;
- return analyzer.getPositionIncrementGap(fieldName);
+ return analyzer.getPositionIncrementGap(fieldName, lastTokenPosition);
}
public String toString() {
Index: src/java/org/apache/lucene/analysis/Analyzer.java
===================================================================
--- src/java/org/apache/lucene/analysis/Analyzer.java (revision 727243)
+++ src/java/org/apache/lucene/analysis/Analyzer.java (working copy)
@@ -62,6 +62,21 @@
/**
+ * Provides a constant gap between the position values of tokens
+ * from different Fieldable instances which share the same field name.
+ * Used by the default implementation of
+ * {@link #getPositionIncrementGap(String, int)}.
+ *
+ * @param fieldName Fieldable name being indexed.
+ * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+ * @deprecated replaced with {@link #getPositionIncrementGap(String, int)}
+ */
+ protected int getPositionIncrementGap(String fieldName)
+ {
+ return 0;
+ }
+
+ /**
* Invoked before indexing a Fieldable instance if
* terms have already been added to that field. This allows custom
* analyzers to place an automatic position increment gap between
@@ -70,12 +85,18 @@
* the typical default token position increment of 1, all terms in a field,
* including across Fieldable instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+ * The last token position is supplied to enable analyzers to 'line up' terms;
+ * for example, for subsequent terms to start at positions that are multiples
+ * of 100. Defaults to the constant gap supplied by
+ * {@link #getPositionIncrementGap(String)}.
*
* @param fieldName Fieldable name being indexed.
- * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+ * @param lastTokenPosition the last token position for this field
+ * @return position increment gap, added to the next token emitted from
+ * {@link #tokenStream(String,Reader)}
*/
- public int getPositionIncrementGap(String fieldName)
+ public int getPositionIncrementGap(String fieldName, int lastTokenPosition)
{
- return 0;
+ return getPositionIncrementGap(fieldName);
}
}
Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
===================================================================
--- src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 727243)
+++ src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy)
@@ -102,7 +102,8 @@
}
}
- public void testPositionIncrementGap() throws IOException {
+ public void testOldPositionIncrementGap() throws IOException {
+ // Tests the use of the legacy getPositionIncrementGap(String)
Analyzer analyzer = new Analyzer() {
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(reader);
@@ -112,6 +113,26 @@
return 500;
}
};
+ assertExpectedPositionIncrementGap(analyzer, 2, 0, 502);
+ }
+
+ public void testNewPositionIncrementGap() throws IOException {
+ // Tests the use of the new getPositionIncrementGap(String, int)
+ Analyzer analyzer = new Analyzer() {
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new WhitespaceTokenizer(reader);
+ }
+
+ public int getPositionIncrementGap(String fieldName, int lastTokenPosition) {
+ // Calculate gap to round up to nearest 500
+ return (((lastTokenPosition / 500) + 1) * 500) - lastTokenPosition;
+ }
+ };
+ assertExpectedPositionIncrementGap(analyzer, 2, 0, 500);
+ }
+
+ private void assertExpectedPositionIncrementGap(Analyzer analyzer,
+ int expectedFreq, int firstPosition, int secondPosition) throws IOException {
IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
@@ -128,9 +149,9 @@
TermPositions termPositions = reader.termPositions(new Term("repeated", "repeated"));
assertTrue(termPositions.next());
int freq = termPositions.freq();
- assertEquals(2, freq);
- assertEquals(0, termPositions.nextPosition());
- assertEquals(502, termPositions.nextPosition());
+ assertEquals(expectedFreq, freq);
+ assertEquals(firstPosition, termPositions.nextPosition());
+ assertEquals(secondPosition, termPositions.nextPosition());
}
public void testTokenReuse() throws IOException {