blob: 6824b859b7b0ea9f8da2fd560200d52d0bb3293e [file] [log] [blame]
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.util.IOUtils;
/**
* Holds state for inverting all occurrences of a single
* field in the document. This class doesn't do anything
* itself; instead, it forwards the tokens produced by
* analysis to its own consumer
* (InvertedDocConsumerPerField). It also interacts with an
* endConsumer (InvertedDocEndConsumerPerField).
*/
final class DocInverterPerField extends DocFieldConsumerPerField {
final FieldInfo fieldInfo;
final InvertedDocConsumerPerField consumer;
final InvertedDocEndConsumerPerField endConsumer;
final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
docState = parent.docState;
fieldState = new FieldInvertState(fieldInfo.name);
this.consumer = parent.consumer.addField(this, fieldInfo);
this.endConsumer = parent.endConsumer.addField(this, fieldInfo);
}
@Override
void abort() {
try {
consumer.abort();
} finally {
endConsumer.abort();
}
}
@Override
public void processFields(final IndexableField[] fields,
final int count) throws IOException {
fieldState.reset();
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i<count;i++) {
final IndexableField field = fields[i];
final IndexableFieldType fieldType = field.fieldType();
// TODO FI: this should be "genericized" to querying
// consumer if it wants to see this particular field
// tokenized.
if (doInvert) {
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
// if the field omits norms, the boost cannot be indexed.
if (fieldType.omitNorms() && field.boost() != 1.0f) {
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
}
// only bother checking offsets if something will consume them.
// TODO: after we fix analyzers, also check if termVectorOffsets will be indexed.
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
int lastStartOffset = 0;
if (i > 0) {
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
}
try (TokenStream stream = field.tokenStream(docState.analyzer)) {
// reset the TokenStream to the first token
stream.reset();
boolean hasMoreTokens = stream.incrementToken();
fieldState.attributeSource = stream;
OffsetAttribute offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncrAttribute = fieldState.attributeSource.addAttribute(PositionIncrementAttribute.class);
if (hasMoreTokens) {
consumer.start(field);
do {
// If we hit an exception in stream.next below
// (which is fairly common, eg if analyzer
// chokes on a given document), then it's
// non-aborting and (above) this one document
// will be marked as deleted, but still
// consume a docID
final int posIncr = posIncrAttribute.getPositionIncrement();
if (posIncr < 0) {
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
}
if (fieldState.position == 0 && posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
}
int position = fieldState.position + posIncr;
if (position > 0) {
// NOTE: confusing: this "mirrors" the
// position++ we do below
position--;
} else if (position < 0) {
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
}
// position is legal, we can safely place it in fieldState now.
// not sure if anything will use fieldState after non-aborting exc...
fieldState.position = position;
if (posIncr == 0)
fieldState.numOverlap++;
if (checkOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
}
if (startOffset < lastStartOffset) {
throw new IllegalArgumentException("offsets must not go backwards startOffset="
+ startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
}
lastStartOffset = startOffset;
}
boolean success = false;
try {
// If we hit an exception in here, we abort
// all buffered documents since the last
// flush, on the likelihood that the
// internal state of the consumer is now
// corrupt and should not be flushed to a
// new segment:
consumer.add();
success = true;
} finally {
if (!success) {
docState.docWriter.setAborting();
}
}
fieldState.length++;
fieldState.position++;
} while (stream.incrementToken());
}
// trigger streams to perform end-of-stream operations
stream.end();
// TODO: maybe add some safety? then again, its already checked
// when we come back around to the field...
fieldState.position += posIncrAttribute.getPositionIncrement();
fieldState.offset += offsetAttribute.endOffset();
}
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
fieldState.boost *= field.boost();
}
// LUCENE-2387: don't hang onto the field, so GC can
// reclaim
fields[i] = null;
}
consumer.finish();
endConsumer.finish();
}
@Override
FieldInfo getFieldInfo() {
return fieldInfo;
}
}