blob: 881778ad9331f58154e423dad3b22cf3a04de517 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefArray;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.Counter;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
/** Buffers up pending byte[] per doc, then flushes when
* segment flushes. */
class BinaryDocValuesWriter extends DocValuesWriter<BinaryDocValues> {
/** Maximum length for a binary field. */
private static final int MAX_LENGTH = ArrayUtil.MAX_ARRAY_LENGTH;
// 4 KB block sizes for PagedBytes storage:
private final static int BLOCK_BITS = 12;
private final PagedBytes bytes;
private final DataOutput bytesOut;
private final Counter iwBytesUsed;
private final PackedLongValues.Builder lengths;
private DocsWithFieldSet docsWithField;
private final FieldInfo fieldInfo;
private long bytesUsed;
private int lastDocID = -1;
private int maxLength = 0;
private PackedLongValues finalLengths;
BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
this.fieldInfo = fieldInfo;
this.bytes = new PagedBytes(BLOCK_BITS);
this.bytesOut = bytes.getDataOutput();
this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
this.iwBytesUsed = iwBytesUsed;
this.docsWithField = new DocsWithFieldSet();
this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(bytesUsed);
}
public void addValue(int docID, BytesRef value) {
if (docID <= lastDocID) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" appears more than once in this document (only one value is allowed per field)");
}
if (value == null) {
throw new IllegalArgumentException("field=\"" + fieldInfo.name + "\": null value not allowed");
}
if (value.length > MAX_LENGTH) {
throw new IllegalArgumentException("DocValuesField \"" + fieldInfo.name + "\" is too large, must be <= " + MAX_LENGTH);
}
maxLength = Math.max(value.length, maxLength);
lengths.add(value.length);
try {
bytesOut.writeBytes(value.bytes, value.offset, value.length);
} catch (IOException ioe) {
// Should never happen!
throw new RuntimeException(ioe);
}
docsWithField.add(docID);
updateBytesUsed();
lastDocID = docID;
}
private void updateBytesUsed() {
final long newBytesUsed = lengths.ramBytesUsed() + bytes.ramBytesUsed() + docsWithField.ramBytesUsed();
iwBytesUsed.addAndGet(newBytesUsed - bytesUsed);
bytesUsed = newBytesUsed;
}
@Override
BinaryDocValues getDocValues() {
if (finalLengths == null) {
finalLengths = this.lengths.build();
}
return new BufferedBinaryDocValues(finalLengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
}
@Override
public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer) throws IOException {
bytes.freeze(false);
if (finalLengths == null) {
finalLengths = this.lengths.build();
}
final BinaryDVs sorted;
if (sortMap != null) {
sorted = new BinaryDVs(state.segmentInfo.maxDoc(), sortMap,
new BufferedBinaryDocValues(finalLengths, maxLength, bytes.getDataInput(), docsWithField.iterator()));
} else {
sorted = null;
}
dvConsumer.addBinaryField(fieldInfo,
new EmptyDocValuesProducer() {
@Override
public BinaryDocValues getBinary(FieldInfo fieldInfoIn) {
if (fieldInfoIn != fieldInfo) {
throw new IllegalArgumentException("wrong fieldInfo");
}
if (sorted == null) {
return new BufferedBinaryDocValues(finalLengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
} else {
return new SortingBinaryDocValues(sorted);
}
}
});
}
// iterates over the values we have in ram
private static class BufferedBinaryDocValues extends BinaryDocValues {
final BytesRefBuilder value;
final PackedLongValues.Iterator lengthsIterator;
final DocIdSetIterator docsWithField;
final DataInput bytesIterator;
BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
this.value = new BytesRefBuilder();
this.value.grow(maxLength);
this.lengthsIterator = lengths.iterator();
this.bytesIterator = bytesIterator;
this.docsWithField = docsWithFields;
}
@Override
public int docID() {
return docsWithField.docID();
}
@Override
public int nextDoc() throws IOException {
int docID = docsWithField.nextDoc();
if (docID != NO_MORE_DOCS) {
int length = Math.toIntExact(lengthsIterator.next());
value.setLength(length);
bytesIterator.readBytes(value.bytes(), 0, length);
}
return docID;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long cost() {
return docsWithField.cost();
}
@Override
public BytesRef binaryValue() {
return value.get();
}
}
static class SortingBinaryDocValues extends BinaryDocValues {
private final BinaryDVs dvs;
private final BytesRefBuilder spare = new BytesRefBuilder();
private int docID = -1;
SortingBinaryDocValues(BinaryDVs dvs) {
this.dvs = dvs;
}
@Override
public int nextDoc() {
do {
docID++;
if (docID == dvs.offsets.length) {
return docID = NO_MORE_DOCS;
}
} while (dvs.offsets[docID] <= 0);
return docID;
}
@Override
public int docID() {
return docID;
}
@Override
public int advance(int target) {
throw new UnsupportedOperationException("use nextDoc instead");
}
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException("use nextDoc instead");
}
@Override
public BytesRef binaryValue() {
dvs.values.get(spare, dvs.offsets[docID]-1);
return spare.get();
}
@Override
public long cost() {
return dvs.values.size();
}
}
static final class BinaryDVs {
final int[] offsets;
final BytesRefArray values;
BinaryDVs(int maxDoc, Sorter.DocMap sortMap, BinaryDocValues oldValues) throws IOException {
offsets = new int[maxDoc];
values = new BytesRefArray(Counter.newCounter());
int offset = 1; // 0 means no values for this document
int docID;
while ((docID = oldValues.nextDoc()) != NO_MORE_DOCS) {
int newDocID = sortMap.oldToNew(docID);
values.append(oldValues.binaryValue());
offsets[newDocID] = offset++;
}
}
}
}