| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.Set; |
| import java.util.concurrent.CopyOnWriteArraySet; |
| |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.DocValuesProducer; |
| import org.apache.lucene.codecs.FieldInfosFormat; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.codecs.PointsReader; |
| import org.apache.lucene.codecs.StoredFieldsReader; |
| import org.apache.lucene.codecs.TermVectorsReader; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.IOUtils; |
| |
| /** |
| * IndexReader implementation over a single segment. |
| * <p> |
| * Instances pointing to the same segment (but with different deletes, etc) |
| * may share the same core data. |
| * @lucene.experimental |
| */ |
| public final class SegmentReader extends CodecReader { |
| |
| private final SegmentCommitInfo si; |
| // this is the original SI that IW uses internally but it's mutated behind the scenes |
| // and we don't want this SI to be used for anything. Yet, IW needs this to do maintainance |
| // and lookup pooled readers etc. |
| private final SegmentCommitInfo originalSi; |
| private final LeafMetaData metaData; |
| private final Bits liveDocs; |
| private final Bits hardLiveDocs; |
| |
| // Normally set to si.maxDoc - si.delDocCount, unless we |
| // were created as an NRT reader from IW, in which case IW |
| // tells us the number of live docs: |
| private final int numDocs; |
| |
| final SegmentCoreReaders core; |
| final SegmentDocValues segDocValues; |
| |
| /** True if we are holding RAM only liveDocs or DV updates, i.e. the SegmentCommitInfo delGen doesn't match our liveDocs. */ |
| final boolean isNRT; |
| |
| final DocValuesProducer docValuesProducer; |
| final FieldInfos fieldInfos; |
| |
| /** |
| * Constructs a new SegmentReader with a new core. |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| */ |
| SegmentReader(SegmentCommitInfo si, int createdVersionMajor, IOContext context) throws IOException { |
| this.si = si.clone(); |
| this.originalSi = si; |
| this.metaData = new LeafMetaData(createdVersionMajor, si.info.getMinVersion(), si.info.getIndexSort()); |
| |
| // We pull liveDocs/DV updates from disk: |
| this.isNRT = false; |
| |
| core = new SegmentCoreReaders(si.info.dir, si, context); |
| segDocValues = new SegmentDocValues(); |
| |
| boolean success = false; |
| final Codec codec = si.info.getCodec(); |
| try { |
| if (si.hasDeletions()) { |
| // NOTE: the bitvector is stored using the regular directory, not cfs |
| hardLiveDocs = liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); |
| } else { |
| assert si.getDelCount() == 0; |
| hardLiveDocs = liveDocs = null; |
| } |
| numDocs = si.info.maxDoc() - si.getDelCount(); |
| |
| fieldInfos = initFieldInfos(); |
| docValuesProducer = initDocValuesProducer(); |
| assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs); |
| success = true; |
| } finally { |
| // With lock-less commits, it's entirely possible (and |
| // fine) to hit a FileNotFound exception above. In |
| // this case, we want to explicitly close any subset |
| // of things that were opened so that we don't have to |
| // wait for a GC to do so. |
| if (!success) { |
| doClose(); |
| } |
| } |
| } |
| |
| /** Create new SegmentReader sharing core from a previous |
| * SegmentReader and using the provided liveDocs, and recording |
| * whether those liveDocs were carried in ram (isNRT=true). */ |
| SegmentReader(SegmentCommitInfo si, SegmentReader sr, Bits liveDocs, Bits hardLiveDocs, int numDocs, boolean isNRT) throws IOException { |
| if (numDocs > si.info.maxDoc()) { |
| throw new IllegalArgumentException("numDocs=" + numDocs + " but maxDoc=" + si.info.maxDoc()); |
| } |
| if (liveDocs != null && liveDocs.length() != si.info.maxDoc()) { |
| throw new IllegalArgumentException("maxDoc=" + si.info.maxDoc() + " but liveDocs.size()=" + liveDocs.length()); |
| } |
| this.si = si.clone(); |
| this.originalSi = si; |
| this.metaData = sr.getMetaData(); |
| this.liveDocs = liveDocs; |
| this.hardLiveDocs = hardLiveDocs; |
| assert assertLiveDocs(isNRT, hardLiveDocs, liveDocs); |
| this.isNRT = isNRT; |
| this.numDocs = numDocs; |
| this.core = sr.core; |
| core.incRef(); |
| this.segDocValues = sr.segDocValues; |
| |
| boolean success = false; |
| try { |
| fieldInfos = initFieldInfos(); |
| docValuesProducer = initDocValuesProducer(); |
| success = true; |
| } finally { |
| if (!success) { |
| doClose(); |
| } |
| } |
| } |
| |
| private static boolean assertLiveDocs(boolean isNRT, Bits hardLiveDocs, Bits liveDocs) { |
| if (isNRT) { |
| assert hardLiveDocs == null || liveDocs != null : " liveDocs must be non null if hardLiveDocs are non null"; |
| } else { |
| assert hardLiveDocs == liveDocs : "non-nrt case must have identical liveDocs"; |
| } |
| return true; |
| } |
| |
| /** |
| * init most recent DocValues for the current commit |
| */ |
| private DocValuesProducer initDocValuesProducer() throws IOException { |
| |
| if (fieldInfos.hasDocValues() == false) { |
| return null; |
| } else { |
| Directory dir; |
| if (core.cfsReader != null) { |
| dir = core.cfsReader; |
| } else { |
| dir = si.info.dir; |
| } |
| if (si.hasFieldUpdates()) { |
| return new SegmentDocValuesProducer(si, dir, core.coreFieldInfos, fieldInfos, segDocValues); |
| } else { |
| // simple case, no DocValues updates |
| return segDocValues.getDocValuesProducer(-1L, si, dir, fieldInfos); |
| } |
| } |
| } |
| |
| /** |
| * init most recent FieldInfos for the current commit |
| */ |
| private FieldInfos initFieldInfos() throws IOException { |
| if (!si.hasFieldUpdates()) { |
| return core.coreFieldInfos; |
| } else { |
| // updates always outside of CFS |
| FieldInfosFormat fisFormat = si.info.getCodec().fieldInfosFormat(); |
| final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX); |
| return fisFormat.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE); |
| } |
| } |
| |
| @Override |
| public Bits getLiveDocs() { |
| ensureOpen(); |
| return liveDocs; |
| } |
| |
| @Override |
| protected void doClose() throws IOException { |
| //System.out.println("SR.close seg=" + si); |
| try { |
| core.decRef(); |
| } finally { |
| if (docValuesProducer instanceof SegmentDocValuesProducer) { |
| segDocValues.decRef(((SegmentDocValuesProducer)docValuesProducer).dvGens); |
| } else if (docValuesProducer != null) { |
| segDocValues.decRef(Collections.singletonList(-1L)); |
| } |
| } |
| } |
| |
| @Override |
| public FieldInfos getFieldInfos() { |
| ensureOpen(); |
| return fieldInfos; |
| } |
| |
| @Override |
| public int numDocs() { |
| // Don't call ensureOpen() here (it could affect performance) |
| return numDocs; |
| } |
| |
| @Override |
| public int maxDoc() { |
| // Don't call ensureOpen() here (it could affect performance) |
| return si.info.maxDoc(); |
| } |
| |
| @Override |
| public TermVectorsReader getTermVectorsReader() { |
| ensureOpen(); |
| return core.termVectorsLocal.get(); |
| } |
| |
| @Override |
| public StoredFieldsReader getFieldsReader() { |
| ensureOpen(); |
| return core.fieldsReaderLocal.get(); |
| } |
| |
| @Override |
| public PointsReader getPointsReader() { |
| ensureOpen(); |
| return core.pointsReader; |
| } |
| |
| @Override |
| public NormsProducer getNormsReader() { |
| ensureOpen(); |
| return core.normsProducer; |
| } |
| |
| @Override |
| public DocValuesProducer getDocValuesReader() { |
| ensureOpen(); |
| return docValuesProducer; |
| } |
| |
| @Override |
| public FieldsProducer getPostingsReader() { |
| ensureOpen(); |
| return core.fields; |
| } |
| |
| @Override |
| public String toString() { |
| // SegmentInfo.toString takes dir and number of |
| // *pending* deletions; so we reverse compute that here: |
| return si.toString(si.info.maxDoc() - numDocs - si.getDelCount()); |
| } |
| |
| /** |
| * Return the name of the segment this reader is reading. |
| */ |
| public String getSegmentName() { |
| return si.info.name; |
| } |
| |
| /** |
| * Return the SegmentInfoPerCommit of the segment this reader is reading. |
| */ |
| public SegmentCommitInfo getSegmentInfo() { |
| return si; |
| } |
| |
| /** Returns the directory this index resides in. */ |
| public Directory directory() { |
| // Don't ensureOpen here -- in certain cases, when a |
| // cloned/reopened reader needs to commit, it may call |
| // this method on the closed original reader |
| return si.info.dir; |
| } |
| |
| private final Set<ClosedListener> readerClosedListeners = new CopyOnWriteArraySet<>(); |
| |
| @Override |
| void notifyReaderClosedListeners() throws IOException { |
| synchronized(readerClosedListeners) { |
| IOUtils.applyToAll(readerClosedListeners, l -> l.onClose(readerCacheHelper.getKey())); |
| } |
| } |
| |
| private final IndexReader.CacheHelper readerCacheHelper = new IndexReader.CacheHelper() { |
| private final IndexReader.CacheKey cacheKey = new IndexReader.CacheKey(); |
| |
| @Override |
| public CacheKey getKey() { |
| return cacheKey; |
| } |
| |
| @Override |
| public void addClosedListener(ClosedListener listener) { |
| ensureOpen(); |
| readerClosedListeners.add(listener); |
| } |
| }; |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return readerCacheHelper; |
| } |
| |
| /** Wrap the cache helper of the core to add ensureOpen() calls that make |
| * sure users do not register closed listeners on closed indices. */ |
| private final IndexReader.CacheHelper coreCacheHelper = new IndexReader.CacheHelper() { |
| |
| @Override |
| public CacheKey getKey() { |
| return core.getCacheHelper().getKey(); |
| } |
| |
| @Override |
| public void addClosedListener(ClosedListener listener) { |
| ensureOpen(); |
| core.getCacheHelper().addClosedListener(listener); |
| } |
| }; |
| |
| @Override |
| public CacheHelper getCoreCacheHelper() { |
| return coreCacheHelper; |
| } |
| |
| @Override |
| public LeafMetaData getMetaData() { |
| return metaData; |
| } |
| |
| /** |
| * Returns the original SegmentInfo passed to the segment reader on creation time. |
| * {@link #getSegmentInfo()} returns a clone of this instance. |
| */ |
| SegmentCommitInfo getOriginalSegmentInfo() { |
| return originalSi; |
| } |
| |
| /** |
| * Returns the live docs that are not hard-deleted. This is an expert API to be used with |
| * soft-deletes to filter out document that hard deleted for instance due to aborted documents or to distinguish |
| * soft and hard deleted documents ie. a rolled back tombstone. |
| * @lucene.experimental |
| */ |
| public Bits getHardLiveDocs() { |
| return hardLiveDocs; |
| } |
| |
| @Override |
| public void checkIntegrity() throws IOException { |
| super.checkIntegrity(); |
| if (core.cfsReader != null) { |
| core.cfsReader.checkIntegrity(); |
| } |
| } |
| } |