blob: 477fa2e4a6c5e650b34a5a7082556bdae61c877c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.codecs.idversion;
import java.io.IOException;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PushPostingsWriterBase;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
final class IDVersionPostingsWriter extends PushPostingsWriterBase {
static final String TERMS_CODEC = "IDVersionPostingsWriterTerms";
// Increment version to change it
static final int VERSION_START = 1;
static final int VERSION_CURRENT = VERSION_START;
static final IDVersionTermState emptyState = new IDVersionTermState();
IDVersionTermState lastState;
int lastDocID;
private int lastPosition;
private long lastVersion;
private final Bits liveDocs;
public IDVersionPostingsWriter(Bits liveDocs) {
this.liveDocs = liveDocs;
}
@Override
public BlockTermState newTermState() {
return new IDVersionTermState();
}
@Override
public void init(IndexOutput termsOut, SegmentWriteState state) throws IOException {
CodecUtil.writeIndexHeader(
termsOut, TERMS_CODEC, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
}
@Override
public void setField(FieldInfo fieldInfo) {
super.setField(fieldInfo);
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
throw new IllegalArgumentException(
"field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
}
// LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted
// docs, and because our PF only indexes the
// non-deleted documents on flush, CheckIndex will see this as corruption:
if (fieldInfo.hasVectors()) {
throw new IllegalArgumentException(
"field cannot index term vectors: CheckIndex will report this as index corruption");
}
lastState = emptyState;
}
@Override
public void startTerm(NumericDocValues norms) {
lastDocID = -1;
}
@Override
public void startDoc(int docID, int termDocFreq) throws IOException {
// TODO: LUCENE-5693: we don't need this check if we fix IW to not send deleted docs to us on
// flush:
if (liveDocs != null && liveDocs.get(docID) == false) {
return;
}
if (lastDocID != -1) {
throw new IllegalArgumentException(
"term appears in more than one document: " + lastDocID + " and " + docID);
}
if (termDocFreq != 1) {
throw new IllegalArgumentException("term appears more than once in the document");
}
lastDocID = docID;
lastPosition = -1;
lastVersion = -1;
}
@Override
public void addPosition(int position, BytesRef payload, int startOffset, int endOffset)
throws IOException {
if (lastDocID == -1) {
// Doc is deleted; skip it
return;
}
if (lastPosition != -1) {
throw new IllegalArgumentException("term appears more than once in document");
}
lastPosition = position;
if (payload == null) {
throw new IllegalArgumentException("token doens't have a payload");
}
if (payload.length != 8) {
throw new IllegalArgumentException("payload.length != 8 (got " + payload.length + ")");
}
lastVersion = IDVersionPostingsFormat.bytesToLong(payload);
if (lastVersion < IDVersionPostingsFormat.MIN_VERSION) {
throw new IllegalArgumentException(
"version must be >= MIN_VERSION="
+ IDVersionPostingsFormat.MIN_VERSION
+ " (got: "
+ lastVersion
+ "; payload="
+ payload
+ ")");
}
if (lastVersion > IDVersionPostingsFormat.MAX_VERSION) {
throw new IllegalArgumentException(
"version must be <= MAX_VERSION="
+ IDVersionPostingsFormat.MAX_VERSION
+ " (got: "
+ lastVersion
+ "; payload="
+ payload
+ ")");
}
}
@Override
public void finishDoc() throws IOException {
if (lastDocID == -1) {
// Doc is deleted; skip it
return;
}
if (lastPosition == -1) {
throw new IllegalArgumentException("missing addPosition");
}
}
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(BlockTermState _state) throws IOException {
if (lastDocID == -1) {
return;
}
IDVersionTermState state = (IDVersionTermState) _state;
assert state.docFreq > 0;
state.docID = lastDocID;
state.idVersion = lastVersion;
}
private long lastEncodedVersion;
@Override
public void encodeTerm(
DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute)
throws IOException {
IDVersionTermState state = (IDVersionTermState) _state;
out.writeVInt(state.docID);
if (absolute) {
out.writeVLong(state.idVersion);
} else {
long delta = state.idVersion - lastEncodedVersion;
out.writeZLong(delta);
}
lastEncodedVersion = state.idVersion;
}
@Override
public void close() throws IOException {}
}