| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.codecs; |
| |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| |
| import org.apache.lucene.index.BinaryDocValues; |
| import org.apache.lucene.index.DocIDMerger; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.EmptyDocValuesProducer; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FilteredTermsEnum; |
| import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.MergeState; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.OrdinalMap; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.SegmentWriteState; // javadocs |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedNumericDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.util.AttributeSource; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LongBitSet; |
| import org.apache.lucene.util.LongValues; |
| import org.apache.lucene.util.packed.PackedInts; |
| |
| import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| |
| /** |
| * Abstract API that consumes numeric, binary and |
| * sorted docvalues. Concrete implementations of this |
| * actually do "something" with the docvalues (write it into |
| * the index in a specific format). |
| * <p> |
| * The lifecycle is: |
| * <ol> |
| * <li>DocValuesConsumer is created by |
| * {@link NormsFormat#normsConsumer(SegmentWriteState)}. |
| * <li>{@link #addNumericField}, {@link #addBinaryField}, |
| * {@link #addSortedField}, {@link #addSortedSetField}, |
| * or {@link #addSortedNumericField} are called for each Numeric, |
| * Binary, Sorted, SortedSet, or SortedNumeric docvalues field. |
| * The API is a "pull" rather than "push", and the implementation |
| * is free to iterate over the values multiple times |
| * ({@link Iterable#iterator()}). |
| * <li>After all fields are added, the consumer is {@link #close}d. |
| * </ol> |
| * |
| * @lucene.experimental |
| */ |
| public abstract class DocValuesConsumer implements Closeable { |
| |
| /** Sole constructor. (For invocation by subclass |
| * constructors, typically implicit.) */ |
| protected DocValuesConsumer() {} |
| |
| /** |
| * Writes numeric docvalues for a field. |
| * @param field field information |
| * @param valuesProducer Numeric values to write. |
| * @throws IOException if an I/O error occurred. |
| */ |
| public abstract void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; |
| |
| /** |
| * Writes binary docvalues for a field. |
| * @param field field information |
| * @param valuesProducer Binary values to write. |
| * @throws IOException if an I/O error occurred. |
| */ |
| public abstract void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; |
| |
| /** |
| * Writes pre-sorted binary docvalues for a field. |
| * @param field field information |
| * @param valuesProducer produces the values and ordinals to write |
| * @throws IOException if an I/O error occurred. |
| */ |
| public abstract void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; |
| |
| /** |
| * Writes pre-sorted numeric docvalues for a field |
| * @param field field information |
| * @param valuesProducer produces the values to write |
| * @throws IOException if an I/O error occurred. |
| */ |
| public abstract void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; |
| |
| /** |
| * Writes pre-sorted set docvalues for a field |
| * @param field field information |
| * @param valuesProducer produces the values to write |
| * @throws IOException if an I/O error occurred. |
| */ |
| public abstract void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException; |
| |
| /** Merges in the fields from the readers in |
| * <code>mergeState</code>. The default implementation |
| * calls {@link #mergeNumericField}, {@link #mergeBinaryField}, |
| * {@link #mergeSortedField}, {@link #mergeSortedSetField}, |
| * or {@link #mergeSortedNumericField} for each field, |
| * depending on its type. |
| * Implementations can override this method |
| * for more sophisticated merging (bulk-byte copying, etc). */ |
| public void merge(MergeState mergeState) throws IOException { |
| for(DocValuesProducer docValuesProducer : mergeState.docValuesProducers) { |
| if (docValuesProducer != null) { |
| docValuesProducer.checkIntegrity(); |
| } |
| } |
| |
| for (FieldInfo mergeFieldInfo : mergeState.mergeFieldInfos) { |
| DocValuesType type = mergeFieldInfo.getDocValuesType(); |
| if (type != DocValuesType.NONE) { |
| if (type == DocValuesType.NUMERIC) { |
| mergeNumericField(mergeFieldInfo, mergeState); |
| } else if (type == DocValuesType.BINARY) { |
| mergeBinaryField(mergeFieldInfo, mergeState); |
| } else if (type == DocValuesType.SORTED) { |
| mergeSortedField(mergeFieldInfo, mergeState); |
| } else if (type == DocValuesType.SORTED_SET) { |
| mergeSortedSetField(mergeFieldInfo, mergeState); |
| } else if (type == DocValuesType.SORTED_NUMERIC) { |
| mergeSortedNumericField(mergeFieldInfo, mergeState); |
| } else { |
| throw new AssertionError("type=" + type); |
| } |
| } |
| } |
| } |
| |
| /** Tracks state of one numeric sub-reader that we are merging */ |
| private static class NumericDocValuesSub extends DocIDMerger.Sub { |
| |
| final NumericDocValues values; |
| |
| public NumericDocValuesSub(MergeState.DocMap docMap, NumericDocValues values) { |
| super(docMap); |
| this.values = values; |
| assert values.docID() == -1; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return values.nextDoc(); |
| } |
| } |
| |
| /** |
| * Merges the numeric docvalues from <code>MergeState</code>. |
| * <p> |
| * The default implementation calls {@link #addNumericField}, passing |
| * a DocValuesProducer that merges and filters deleted documents on the fly. |
| */ |
| public void mergeNumericField(final FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { |
| addNumericField(mergeFieldInfo, |
| new EmptyDocValuesProducer() { |
| @Override |
| public NumericDocValues getNumeric(FieldInfo fieldInfo) throws IOException { |
| if (fieldInfo != mergeFieldInfo) { |
| throw new IllegalArgumentException("wrong fieldInfo"); |
| } |
| |
| List<NumericDocValuesSub> subs = new ArrayList<>(); |
| assert mergeState.docMaps.length == mergeState.docValuesProducers.length; |
| long cost = 0; |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| NumericDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.NUMERIC) { |
| values = docValuesProducer.getNumeric(readerFieldInfo); |
| } |
| } |
| if (values != null) { |
| cost += values.cost(); |
| subs.add(new NumericDocValuesSub(mergeState.docMaps[i], values)); |
| } |
| } |
| |
| final DocIDMerger<NumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); |
| |
| final long finalCost = cost; |
| |
| return new NumericDocValues() { |
| private int docID = -1; |
| private NumericDocValuesSub current; |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| current = docIDMerger.next(); |
| if (current == null) { |
| docID = NO_MORE_DOCS; |
| } else { |
| docID = current.mappedDocID; |
| } |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| return finalCost; |
| } |
| |
| @Override |
| public long longValue() throws IOException { |
| return current.values.longValue(); |
| } |
| }; |
| } |
| }); |
| } |
| |
| /** Tracks state of one binary sub-reader that we are merging */ |
| private static class BinaryDocValuesSub extends DocIDMerger.Sub { |
| |
| final BinaryDocValues values; |
| |
| public BinaryDocValuesSub(MergeState.DocMap docMap, BinaryDocValues values) { |
| super(docMap); |
| this.values = values; |
| assert values.docID() == -1; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return values.nextDoc(); |
| } |
| } |
| |
| /** |
| * Merges the binary docvalues from <code>MergeState</code>. |
| * <p> |
| * The default implementation calls {@link #addBinaryField}, passing |
| * a DocValuesProducer that merges and filters deleted documents on the fly. |
| */ |
| public void mergeBinaryField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { |
| addBinaryField(mergeFieldInfo, |
| new EmptyDocValuesProducer() { |
| @Override |
| public BinaryDocValues getBinary(FieldInfo fieldInfo) throws IOException { |
| if (fieldInfo != mergeFieldInfo) { |
| throw new IllegalArgumentException("wrong fieldInfo"); |
| } |
| |
| List<BinaryDocValuesSub> subs = new ArrayList<>(); |
| |
| long cost = 0; |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| BinaryDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.BINARY) { |
| values = docValuesProducer.getBinary(readerFieldInfo); |
| } |
| } |
| if (values != null) { |
| cost += values.cost(); |
| subs.add(new BinaryDocValuesSub(mergeState.docMaps[i], values)); |
| } |
| } |
| |
| final DocIDMerger<BinaryDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); |
| final long finalCost = cost; |
| |
| return new BinaryDocValues() { |
| private BinaryDocValuesSub current; |
| private int docID = -1; |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| current = docIDMerger.next(); |
| if (current == null) { |
| docID = NO_MORE_DOCS; |
| } else { |
| docID = current.mappedDocID; |
| } |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| return finalCost; |
| } |
| |
| @Override |
| public BytesRef binaryValue() throws IOException { |
| return current.values.binaryValue(); |
| } |
| }; |
| } |
| }); |
| } |
| |
| /** Tracks state of one sorted numeric sub-reader that we are merging */ |
| private static class SortedNumericDocValuesSub extends DocIDMerger.Sub { |
| |
| final SortedNumericDocValues values; |
| |
| public SortedNumericDocValuesSub(MergeState.DocMap docMap, SortedNumericDocValues values) { |
| super(docMap); |
| this.values = values; |
| assert values.docID() == -1; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return values.nextDoc(); |
| } |
| } |
| |
| /** |
| * Merges the sorted docvalues from <code>toMerge</code>. |
| * <p> |
| * The default implementation calls {@link #addSortedNumericField}, passing |
| * iterables that filter deleted documents. |
| */ |
| public void mergeSortedNumericField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { |
| |
| addSortedNumericField(mergeFieldInfo, |
| new EmptyDocValuesProducer() { |
| @Override |
| public SortedNumericDocValues getSortedNumeric(FieldInfo fieldInfo) throws IOException { |
| if (fieldInfo != mergeFieldInfo) { |
| throw new IllegalArgumentException("wrong FieldInfo"); |
| } |
| |
| // We must make new iterators + DocIDMerger for each iterator: |
| List<SortedNumericDocValuesSub> subs = new ArrayList<>(); |
| long cost = 0; |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| SortedNumericDocValues values = null; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_NUMERIC) { |
| values = docValuesProducer.getSortedNumeric(readerFieldInfo); |
| } |
| } |
| if (values == null) { |
| values = DocValues.emptySortedNumeric(); |
| } |
| cost += values.cost(); |
| subs.add(new SortedNumericDocValuesSub(mergeState.docMaps[i], values)); |
| } |
| |
| final long finalCost = cost; |
| |
| final DocIDMerger<SortedNumericDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); |
| |
| return new SortedNumericDocValues() { |
| |
| private int docID = -1; |
| private SortedNumericDocValuesSub currentSub; |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| currentSub = docIDMerger.next(); |
| if (currentSub == null) { |
| docID = NO_MORE_DOCS; |
| } else { |
| docID = currentSub.mappedDocID; |
| } |
| |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int docValueCount() { |
| return currentSub.values.docValueCount(); |
| } |
| |
| @Override |
| public long cost() { |
| return finalCost; |
| } |
| |
| @Override |
| public long nextValue() throws IOException { |
| return currentSub.values.nextValue(); |
| } |
| }; |
| } |
| }); |
| } |
| |
| /** |
| * A merged {@link TermsEnum}. This helps avoid relying on the default terms enum, |
| * which calls {@link SortedDocValues#lookupOrd(int)} or |
| * {@link SortedSetDocValues#lookupOrd(long)} on every call to {@link TermsEnum#next()}. |
| */ |
| private static class MergedTermsEnum extends TermsEnum { |
| |
| private final TermsEnum[] subs; |
| private final OrdinalMap ordinalMap; |
| private final long valueCount; |
| private long ord = -1; |
| private BytesRef term; |
| |
| MergedTermsEnum(OrdinalMap ordinalMap, TermsEnum[] subs) { |
| this.ordinalMap = ordinalMap; |
| this.subs = subs; |
| this.valueCount = ordinalMap.getValueCount(); |
| } |
| |
| @Override |
| public BytesRef term() throws IOException { |
| return term; |
| } |
| |
| @Override |
| public long ord() throws IOException { |
| return ord; |
| } |
| |
| @Override |
| public BytesRef next() throws IOException { |
| if (++ord >= valueCount) { |
| return null; |
| } |
| final int subNum = ordinalMap.getFirstSegmentNumber(ord); |
| final TermsEnum sub = subs[subNum]; |
| final long subOrd = ordinalMap.getFirstSegmentOrd(ord); |
| do { |
| term = sub.next(); |
| } while (sub.ord() < subOrd); |
| assert sub.ord() == subOrd; |
| return term; |
| } |
| |
| @Override |
| public AttributeSource attributes() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean seekExact(BytesRef text) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public SeekStatus seekCeil(BytesRef text) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public void seekExact(long ord) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public void seekExact(BytesRef term, TermState state) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int docFreq() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long totalTermFreq() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public ImpactsEnum impacts(int flags) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public TermState termState() throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| } |
| |
| /** Tracks state of one sorted sub-reader that we are merging */ |
| private static class SortedDocValuesSub extends DocIDMerger.Sub { |
| |
| final SortedDocValues values; |
| final LongValues map; |
| |
| public SortedDocValuesSub(MergeState.DocMap docMap, SortedDocValues values, LongValues map) { |
| super(docMap); |
| this.values = values; |
| this.map = map; |
| assert values.docID() == -1; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return values.nextDoc(); |
| } |
| } |
| |
| /** |
| * Merges the sorted docvalues from <code>toMerge</code>. |
| * <p> |
| * The default implementation calls {@link #addSortedField}, passing |
| * an Iterable that merges ordinals and values and filters deleted documents . |
| */ |
| public void mergeSortedField(FieldInfo fieldInfo, final MergeState mergeState) throws IOException { |
| List<SortedDocValues> toMerge = new ArrayList<>(); |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| SortedDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) { |
| values = docValuesProducer.getSorted(fieldInfo); |
| } |
| } |
| if (values == null) { |
| values = DocValues.emptySorted(); |
| } |
| toMerge.add(values); |
| } |
| |
| final int numReaders = toMerge.size(); |
| final SortedDocValues dvs[] = toMerge.toArray(new SortedDocValues[numReaders]); |
| |
| // step 1: iterate thru each sub and mark terms still in use |
| TermsEnum liveTerms[] = new TermsEnum[dvs.length]; |
| long[] weights = new long[liveTerms.length]; |
| for (int sub=0;sub<numReaders;sub++) { |
| SortedDocValues dv = dvs[sub]; |
| Bits liveDocs = mergeState.liveDocs[sub]; |
| if (liveDocs == null) { |
| liveTerms[sub] = dv.termsEnum(); |
| weights[sub] = dv.getValueCount(); |
| } else { |
| LongBitSet bitset = new LongBitSet(dv.getValueCount()); |
| int docID; |
| while ((docID = dv.nextDoc()) != NO_MORE_DOCS) { |
| if (liveDocs.get(docID)) { |
| int ord = dv.ordValue(); |
| if (ord >= 0) { |
| bitset.set(ord); |
| } |
| } |
| } |
| liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); |
| weights[sub] = bitset.cardinality(); |
| } |
| } |
| |
| // step 2: create ordinal map (this conceptually does the "merging") |
| final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT); |
| |
| // step 3: add field |
| addSortedField(fieldInfo, |
| new EmptyDocValuesProducer() { |
| @Override |
| public SortedDocValues getSorted(FieldInfo fieldInfoIn) throws IOException { |
| if (fieldInfoIn != fieldInfo) { |
| throw new IllegalArgumentException("wrong FieldInfo"); |
| } |
| |
| // We must make new iterators + DocIDMerger for each iterator: |
| |
| List<SortedDocValuesSub> subs = new ArrayList<>(); |
| long cost = 0; |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| SortedDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(fieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED) { |
| values = docValuesProducer.getSorted(readerFieldInfo); |
| } |
| } |
| if (values == null) { |
| values = DocValues.emptySorted(); |
| } |
| cost += values.cost(); |
| |
| subs.add(new SortedDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i))); |
| } |
| |
| final long finalCost = cost; |
| |
| final DocIDMerger<SortedDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); |
| |
| return new SortedDocValues() { |
| private int docID = -1; |
| private int ord; |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| SortedDocValuesSub sub = docIDMerger.next(); |
| if (sub == null) { |
| return docID = NO_MORE_DOCS; |
| } |
| int subOrd = sub.values.ordValue(); |
| assert subOrd != -1; |
| ord = (int) sub.map.get(subOrd); |
| docID = sub.mappedDocID; |
| return docID; |
| } |
| |
| @Override |
| public int ordValue() { |
| return ord; |
| } |
| |
| @Override |
| public int advance(int target) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long cost() { |
| return finalCost; |
| } |
| |
| @Override |
| public int getValueCount() { |
| return (int) map.getValueCount(); |
| } |
| |
| @Override |
| public BytesRef lookupOrd(int ord) throws IOException { |
| int segmentNumber = map.getFirstSegmentNumber(ord); |
| int segmentOrd = (int) map.getFirstSegmentOrd(ord); |
| return dvs[segmentNumber].lookupOrd(segmentOrd); |
| } |
| |
| @Override |
| public TermsEnum termsEnum() throws IOException { |
| TermsEnum[] subs = new TermsEnum[toMerge.size()]; |
| for (int sub = 0; sub < subs.length; ++sub) { |
| subs[sub] = toMerge.get(sub).termsEnum(); |
| } |
| return new MergedTermsEnum(map, subs); |
| } |
| }; |
| } |
| }); |
| } |
| |
| /** Tracks state of one sorted set sub-reader that we are merging */ |
| private static class SortedSetDocValuesSub extends DocIDMerger.Sub { |
| |
| final SortedSetDocValues values; |
| final LongValues map; |
| |
| public SortedSetDocValuesSub(MergeState.DocMap docMap, SortedSetDocValues values, LongValues map) { |
| super(docMap); |
| this.values = values; |
| this.map = map; |
| assert values.docID() == -1; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return values.nextDoc(); |
| } |
| |
| @Override |
| public String toString() { |
| return "SortedSetDocValuesSub(mappedDocID=" + mappedDocID + " values=" + values + ")"; |
| } |
| } |
| |
| /** |
| * Merges the sortedset docvalues from <code>toMerge</code>. |
| * <p> |
| * The default implementation calls {@link #addSortedSetField}, passing |
| * an Iterable that merges ordinals and values and filters deleted documents . |
| */ |
| public void mergeSortedSetField(FieldInfo mergeFieldInfo, final MergeState mergeState) throws IOException { |
| |
| List<SortedSetDocValues> toMerge = new ArrayList<>(); |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| SortedSetDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo fieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name); |
| if (fieldInfo != null && fieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) { |
| values = docValuesProducer.getSortedSet(fieldInfo); |
| } |
| } |
| if (values == null) { |
| values = DocValues.emptySortedSet(); |
| } |
| toMerge.add(values); |
| } |
| |
| // step 1: iterate thru each sub and mark terms still in use |
| TermsEnum liveTerms[] = new TermsEnum[toMerge.size()]; |
| long[] weights = new long[liveTerms.length]; |
| for (int sub = 0; sub < liveTerms.length; sub++) { |
| SortedSetDocValues dv = toMerge.get(sub); |
| Bits liveDocs = mergeState.liveDocs[sub]; |
| if (liveDocs == null) { |
| liveTerms[sub] = dv.termsEnum(); |
| weights[sub] = dv.getValueCount(); |
| } else { |
| LongBitSet bitset = new LongBitSet(dv.getValueCount()); |
| int docID; |
| while ((docID = dv.nextDoc()) != NO_MORE_DOCS) { |
| if (liveDocs.get(docID)) { |
| long ord; |
| while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { |
| bitset.set(ord); |
| } |
| } |
| } |
| liveTerms[sub] = new BitsFilteredTermsEnum(dv.termsEnum(), bitset); |
| weights[sub] = bitset.cardinality(); |
| } |
| } |
| |
| // step 2: create ordinal map (this conceptually does the "merging") |
| final OrdinalMap map = OrdinalMap.build(null, liveTerms, weights, PackedInts.COMPACT); |
| |
| // step 3: add field |
| addSortedSetField(mergeFieldInfo, |
| new EmptyDocValuesProducer() { |
| @Override |
| public SortedSetDocValues getSortedSet(FieldInfo fieldInfo) throws IOException { |
| if (fieldInfo != mergeFieldInfo) { |
| throw new IllegalArgumentException("wrong FieldInfo"); |
| } |
| |
| // We must make new iterators + DocIDMerger for each iterator: |
| List<SortedSetDocValuesSub> subs = new ArrayList<>(); |
| |
| long cost = 0; |
| |
| for (int i=0;i<mergeState.docValuesProducers.length;i++) { |
| SortedSetDocValues values = null; |
| DocValuesProducer docValuesProducer = mergeState.docValuesProducers[i]; |
| if (docValuesProducer != null) { |
| FieldInfo readerFieldInfo = mergeState.fieldInfos[i].fieldInfo(mergeFieldInfo.name); |
| if (readerFieldInfo != null && readerFieldInfo.getDocValuesType() == DocValuesType.SORTED_SET) { |
| values = docValuesProducer.getSortedSet(readerFieldInfo); |
| } |
| } |
| if (values == null) { |
| values = DocValues.emptySortedSet(); |
| } |
| cost += values.cost(); |
| subs.add(new SortedSetDocValuesSub(mergeState.docMaps[i], values, map.getGlobalOrds(i))); |
| } |
| |
| final DocIDMerger<SortedSetDocValuesSub> docIDMerger = DocIDMerger.of(subs, mergeState.needsIndexSort); |
| |
| final long finalCost = cost; |
| |
| return new SortedSetDocValues() { |
| private int docID = -1; |
| private SortedSetDocValuesSub currentSub; |
| |
| @Override |
| public int docID() { |
| return docID; |
| } |
| |
| @Override |
| public int nextDoc() throws IOException { |
| currentSub = docIDMerger.next(); |
| if (currentSub == null) { |
| docID = NO_MORE_DOCS; |
| } else { |
| docID = currentSub.mappedDocID; |
| } |
| |
| return docID; |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long nextOrd() throws IOException { |
| long subOrd = currentSub.values.nextOrd(); |
| if (subOrd == NO_MORE_ORDS) { |
| return NO_MORE_ORDS; |
| } |
| return currentSub.map.get(subOrd); |
| } |
| |
| @Override |
| public long cost() { |
| return finalCost; |
| } |
| |
| @Override |
| public BytesRef lookupOrd(long ord) throws IOException { |
| int segmentNumber = map.getFirstSegmentNumber(ord); |
| long segmentOrd = map.getFirstSegmentOrd(ord); |
| return toMerge.get(segmentNumber).lookupOrd(segmentOrd); |
| } |
| |
| @Override |
| public long getValueCount() { |
| return map.getValueCount(); |
| } |
| |
| @Override |
| public TermsEnum termsEnum() throws IOException { |
| TermsEnum[] subs = new TermsEnum[toMerge.size()]; |
| for (int sub = 0; sub < subs.length; ++sub) { |
| subs[sub] = toMerge.get(sub).termsEnum(); |
| } |
| return new MergedTermsEnum(map, subs); |
| } |
| }; |
| } |
| }); |
| } |
| |
| // TODO: seek-by-ord to nextSetBit |
| static class BitsFilteredTermsEnum extends FilteredTermsEnum { |
| final LongBitSet liveTerms; |
| |
| BitsFilteredTermsEnum(TermsEnum in, LongBitSet liveTerms) { |
| super(in, false); // <-- not passing false here wasted about 3 hours of my time!!!!!!!!!!!!! |
| assert liveTerms != null; |
| this.liveTerms = liveTerms; |
| } |
| |
| @Override |
| protected AcceptStatus accept(BytesRef term) throws IOException { |
| if (liveTerms.get(ord())) { |
| return AcceptStatus.YES; |
| } else { |
| return AcceptStatus.NO; |
| } |
| } |
| } |
| |
| /** Helper: returns true if the given docToValue count contains only at most one value */ |
| public static boolean isSingleValued(Iterable<Number> docToValueCount) { |
| for (Number count : docToValueCount) { |
| if (count.longValue() > 1) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** Helper: returns single-valued view, using {@code missingValue} when count is zero */ |
| public static Iterable<Number> singletonView(final Iterable<Number> docToValueCount, final Iterable<Number> values, final Number missingValue) { |
| assert isSingleValued(docToValueCount); |
| return new Iterable<Number>() { |
| |
| @Override |
| public Iterator<Number> iterator() { |
| final Iterator<Number> countIterator = docToValueCount.iterator(); |
| final Iterator<Number> valuesIterator = values.iterator(); |
| return new Iterator<Number>() { |
| |
| @Override |
| public boolean hasNext() { |
| return countIterator.hasNext(); |
| } |
| |
| @Override |
| public Number next() { |
| int count = countIterator.next().intValue(); |
| if (count == 0) { |
| return missingValue; |
| } else { |
| return valuesIterator.next(); |
| } |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| }; |
| } |
| } |