| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.nio.file.NoSuchFileException; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.List; |
| |
| import org.apache.lucene.search.SearcherManager; // javadocs |
| import org.apache.lucene.store.Directory; |
| |
| /** DirectoryReader is an implementation of {@link CompositeReader} |
| that can read indexes in a {@link Directory}. |
| |
| <p>DirectoryReader instances are usually constructed with a call to |
| one of the static <code>open()</code> methods, e.g. {@link |
| #open(Directory)}. |
| |
| <p> For efficiency, in this API documents are often referred to via |
| <i>document numbers</i>, non-negative integers which each name a unique |
| document in the index. These document numbers are ephemeral -- they may change |
| as documents are added to and deleted from an index. Clients should thus not |
| rely on a given document having the same number between sessions. |
| |
| <p> |
| <a name="thread-safety"></a><p><b>NOTE</b>: {@link |
| IndexReader} instances are completely thread |
| safe, meaning multiple threads can call any of its methods, |
| concurrently. If your application requires external |
| synchronization, you should <b>not</b> synchronize on the |
| <code>IndexReader</code> instance; use your own |
| (non-Lucene) objects instead. |
| */ |
| public abstract class DirectoryReader extends BaseCompositeReader<LeafReader> { |
| |
| /** The index directory. */ |
| protected final Directory directory; |
| |
| /** Returns a IndexReader reading the index in the given |
| * Directory |
| * @param directory the index directory |
| * @throws IOException if there is a low-level IO error |
| */ |
| public static DirectoryReader open(final Directory directory) throws IOException { |
| return StandardDirectoryReader.open(directory, null, null); |
| } |
| |
| /** |
| * Returns a IndexReader for the the index in the given Directory |
| * |
| * @param directory the index directory |
| * @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for |
| * indices on which it is expected to run many queries with particular sort criteria (e.g. for |
| * time-based indices this is usually a descending sort on timestamp). In this case {@code |
| * leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows |
| * to speed up this particular type of sort queries by early terminating while iterating |
| * through segments and segments' documents. |
| * @throws IOException if there is a low-level IO error |
| */ |
| public static DirectoryReader open(final Directory directory, Comparator<LeafReader> leafSorter) |
| throws IOException { |
| return StandardDirectoryReader.open(directory, null, leafSorter); |
| } |
| |
| /** |
| * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. |
| * |
| * @param writer The IndexWriter to open from |
| * @return The new IndexReader |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| * |
| * @see #openIfChanged(DirectoryReader,IndexWriter,boolean) |
| * |
| * @lucene.experimental |
| */ |
| public static DirectoryReader open(final IndexWriter writer) throws IOException { |
| return open(writer, true, false); |
| } |
| |
| /** |
| * Expert: open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}, |
| * controlling whether past deletions should be applied. |
| * |
| * @param writer The IndexWriter to open from |
| * @param applyAllDeletes If true, all buffered deletes will |
| * be applied (made visible) in the returned reader. If |
| * false, the deletes are not applied but remain buffered |
| * (in IndexWriter) so that they will be applied in the |
| * future. Applying deletes can be costly, so if your app |
| * can tolerate deleted documents being returned you might |
| * gain some performance by passing false. |
| * @param writeAllDeletes If true, new deletes will be written |
| * down to index files instead of carried over from writer to |
| * reader directly in heap |
| * |
| * @see #open(IndexWriter) |
| * |
| * @lucene.experimental |
| */ |
| public static DirectoryReader open(final IndexWriter writer, boolean applyAllDeletes, boolean writeAllDeletes) throws IOException { |
| return writer.getReader(applyAllDeletes, writeAllDeletes); |
| } |
| |
| /** Expert: returns an IndexReader reading the index in the given |
| * {@link IndexCommit}. |
| * @param commit the commit point to open |
| * @throws IOException if there is a low-level IO error |
| */ |
| public static DirectoryReader open(final IndexCommit commit) throws IOException { |
| return StandardDirectoryReader.open(commit.getDirectory(), commit, null); |
| } |
| |
| /** Expert: returns an IndexReader reading the index in the given |
| * {@link IndexCommit}. |
| * @param commit the commit point to open |
| * @param leafSorter a comparator for sorting leaf readers. Providing leafSorter is useful for |
| * indices on which it is expected to run many queries with particular sort criteria (e.g. for |
| * time-based indices this is usually a descending sort on timestamp). In this case {@code |
| * leafSorter} should sort leaves according to this sort criteria. Providing leafSorter allows |
| * to speed up this particular type of sort queries by early terminating while iterating |
| * through segments and segments' documents. |
| * @throws IOException if there is a low-level IO error |
| */ |
| public static DirectoryReader open(final IndexCommit commit, Comparator<LeafReader> leafSorter) throws IOException { |
| return StandardDirectoryReader.open(commit.getDirectory(), commit, leafSorter); |
| } |
| |
| /** |
| * If the index has changed since the provided reader was |
| * opened, open and return a new reader; else, return |
| * null. The new reader, if not null, will be the same |
| * type of reader as the previous one, ie an NRT reader |
| * will open a new NRT reader, a MultiReader will open a |
| * new MultiReader, etc. |
| * |
| * <p>This method is typically far less costly than opening a |
| * fully new <code>DirectoryReader</code> as it shares |
| * resources (for example sub-readers) with the provided |
| * <code>DirectoryReader</code>, when possible. |
| * |
| * <p>The provided reader is not closed (you are responsible |
| * for doing so); if a new reader is returned you also |
| * must eventually close it. Be sure to never close a |
| * reader while other threads are still using it; see |
| * {@link SearcherManager} to simplify managing this. |
| * |
| * @throws CorruptIndexException if the index is corrupt |
| * @throws IOException if there is a low-level IO error |
| * @return null if there are no changes; else, a new |
| * DirectoryReader instance which you must eventually close |
| */ |
| public static DirectoryReader openIfChanged(DirectoryReader oldReader) throws IOException { |
| final DirectoryReader newReader = oldReader.doOpenIfChanged(); |
| assert newReader != oldReader; |
| return newReader; |
| } |
| |
| /** |
| * If the IndexCommit differs from what the |
| * provided reader is searching, open and return a new |
| * reader; else, return null. |
| * |
| * @see #openIfChanged(DirectoryReader) |
| */ |
| public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexCommit commit) throws IOException { |
| final DirectoryReader newReader = oldReader.doOpenIfChanged(commit); |
| assert newReader != oldReader; |
| return newReader; |
| } |
| |
| /** |
| * Expert: If there changes (committed or not) in the |
| * {@link IndexWriter} versus what the provided reader is |
| * searching, then open and return a new |
| * IndexReader searching both committed and uncommitted |
| * changes from the writer; else, return null (though, the |
| * current implementation never returns null). |
| * |
| * <p>This provides "near real-time" searching, in that |
| * changes made during an {@link IndexWriter} session can be |
| * quickly made available for searching without closing |
| * the writer nor calling {@link IndexWriter#commit}. |
| * |
| * <p>It's <i>near</i> real-time because there is no hard |
| * guarantee on how quickly you can get a new reader after |
| * making changes with IndexWriter. You'll have to |
| * experiment in your situation to determine if it's |
| * fast enough. As this is a new and experimental |
| * feature, please report back on your findings so we can |
| * learn, improve and iterate.</p> |
| * |
| * <p>The very first time this method is called, this |
| * writer instance will make every effort to pool the |
| * readers that it opens for doing merges, applying |
| * deletes, etc. This means additional resources (RAM, |
| * file descriptors, CPU time) will be consumed.</p> |
| * |
| * <p>For lower latency on reopening a reader, you should |
| * call {@link IndexWriterConfig#setMergedSegmentWarmer} to |
| * pre-warm a newly merged segment before it's committed |
| * to the index. This is important for minimizing |
| * index-to-search delay after a large merge. </p> |
| * |
| * <p>If an addIndexes* call is running in another thread, |
| * then this reader will only search those segments from |
| * the foreign index that have been successfully copied |
| * over, so far.</p> |
| * |
| * <p><b>NOTE</b>: Once the writer is closed, any |
| * outstanding readers may continue to be used. However, |
| * if you attempt to reopen any of those readers, you'll |
| * hit an {@link org.apache.lucene.store.AlreadyClosedException}.</p> |
| * |
| * @return DirectoryReader that covers entire index plus all |
| * changes made so far by this IndexWriter instance, or |
| * null if there are no new changes |
| * |
| * @param writer The IndexWriter to open from |
| * |
| * @throws IOException if there is a low-level IO error |
| * |
| * @lucene.experimental |
| */ |
| public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexWriter writer) throws IOException { |
| return openIfChanged(oldReader, writer, true); |
| } |
| |
| /** |
| * Expert: Opens a new reader, if there are any changes, controlling whether past deletions should be applied. |
| * |
| * @see #openIfChanged(DirectoryReader,IndexWriter) |
| * |
| * @param writer The IndexWriter to open from |
| * |
| * @param applyAllDeletes If true, all buffered deletes will |
| * be applied (made visible) in the returned reader. If |
| * false, the deletes are not applied but remain buffered |
| * (in IndexWriter) so that they will be applied in the |
| * future. Applying deletes can be costly, so if your app |
| * can tolerate deleted documents being returned you might |
| * gain some performance by passing false. |
| * |
| * @throws IOException if there is a low-level IO error |
| * |
| * @lucene.experimental |
| */ |
| public static DirectoryReader openIfChanged(DirectoryReader oldReader, IndexWriter writer, boolean applyAllDeletes) throws IOException { |
| final DirectoryReader newReader = oldReader.doOpenIfChanged(writer, applyAllDeletes); |
| assert newReader != oldReader; |
| return newReader; |
| } |
| |
| /** Returns all commit points that exist in the Directory. |
| * Normally, because the default is {@link |
| * KeepOnlyLastCommitDeletionPolicy}, there would be only |
| * one commit point. But if you're using a custom {@link |
| * IndexDeletionPolicy} then there could be many commits. |
| * Once you have a given commit, you can open a reader on |
| * it by calling {@link DirectoryReader#open(IndexCommit)} |
| * There must be at least one commit in |
| * the Directory, else this method throws {@link |
| * IndexNotFoundException}. Note that if a commit is in |
| * progress while this method is running, that commit |
| * may or may not be returned. |
| * |
| * @return a sorted list of {@link IndexCommit}s, from oldest |
| * to latest. */ |
| public static List<IndexCommit> listCommits(Directory dir) throws IOException { |
| final String[] files = dir.listAll(); |
| |
| List<IndexCommit> commits = new ArrayList<>(); |
| |
| SegmentInfos latest = SegmentInfos.readLatestCommit(dir); |
| final long currentGen = latest.getGeneration(); |
| |
| commits.add(new StandardDirectoryReader.ReaderCommit(null, latest, dir)); |
| |
| for(int i=0;i<files.length;i++) { |
| |
| final String fileName = files[i]; |
| |
| if (fileName.startsWith(IndexFileNames.SEGMENTS) && |
| !fileName.equals(IndexFileNames.OLD_SEGMENTS_GEN) && |
| SegmentInfos.generationFromSegmentsFileName(fileName) < currentGen) { |
| |
| SegmentInfos sis = null; |
| try { |
| // IOException allowed to throw there, in case |
| // segments_N is corrupt |
| sis = SegmentInfos.readCommit(dir, fileName); |
| } catch (FileNotFoundException | NoSuchFileException fnfe) { |
| // LUCENE-948: on NFS (and maybe others), if |
| // you have writers switching back and forth |
| // between machines, it's very likely that the |
| // dir listing will be stale and will claim a |
| // file segments_X exists when in fact it |
| // doesn't. So, we catch this and handle it |
| // as if the file does not exist |
| } |
| |
| if (sis != null) { |
| commits.add(new StandardDirectoryReader.ReaderCommit(null, sis, dir)); |
| } |
| } |
| } |
| |
| // Ensure that the commit points are sorted in ascending order. |
| Collections.sort(commits); |
| |
| return commits; |
| } |
| |
| /** |
| * Returns <code>true</code> if an index likely exists at |
| * the specified directory. Note that if a corrupt index |
| * exists, or if an index in the process of committing |
| * @param directory the directory to check for an index |
| * @return <code>true</code> if an index exists; <code>false</code> otherwise |
| */ |
| public static boolean indexExists(Directory directory) throws IOException { |
| // LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will |
| // return true in cases that should arguably be false, |
| // such as only IW.prepareCommit has been called, or a |
| // corrupt first commit, but it's too deadly to make |
| // this logic "smarter" and risk accidentally returning |
| // false due to various cases like file description |
| // exhaustion, access denied, etc., because in that |
| // case IndexWriter may delete the entire index. It's |
| // safer to err towards "index exists" than try to be |
| // smart about detecting not-yet-fully-committed or |
| // corrupt indices. This means that IndexWriter will |
| // throw an exception on such indices and the app must |
| // resolve the situation manually: |
| String[] files = directory.listAll(); |
| |
| String prefix = IndexFileNames.SEGMENTS + "_"; |
| for(String file : files) { |
| if (file.startsWith(prefix)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Expert: Constructs a {@code DirectoryReader} on the given subReaders. |
| * @param segmentReaders the wrapped atomic index segment readers. This array is |
| * returned by {@link #getSequentialSubReaders} and used to resolve the correct |
| * subreader for docID-based methods. <b>Please note:</b> This array is <b>not</b> |
| * cloned and not protected for modification outside of this reader. |
| * Subclasses of {@code DirectoryReader} should take care to not allow |
| * modification of this internal array, e.g. {@link #doOpenIfChanged()}. |
| * @param leafSorter – a comparator for sorting leaf readers. If not {@code null}, |
| * this comparator is used for sorting leaf readers. |
| */ |
| protected DirectoryReader( |
| Directory directory, LeafReader[] segmentReaders, Comparator<LeafReader> leafSorter) throws IOException { |
| super(segmentReaders, leafSorter); |
| this.directory = directory; |
| } |
| |
| /** Returns the directory this index resides in. */ |
| public final Directory directory() { |
| // Don't ensureOpen here -- in certain cases, when a |
| // cloned/reopened reader needs to commit, it may call |
| // this method on the closed original reader |
| return directory; |
| } |
| |
| /** Implement this method to support {@link #openIfChanged(DirectoryReader)}. |
| * If this reader does not support reopen, return {@code null}, so |
| * client code is happy. This should be consistent with {@link #isCurrent} |
| * (should always return {@code true}) if reopen is not supported. |
| * @throws IOException if there is a low-level IO error |
| * @return null if there are no changes; else, a new |
| * DirectoryReader instance. |
| */ |
| protected abstract DirectoryReader doOpenIfChanged() throws IOException; |
| |
| /** Implement this method to support {@link #openIfChanged(DirectoryReader,IndexCommit)}. |
| * If this reader does not support reopen from a specific {@link IndexCommit}, |
| * throw {@link UnsupportedOperationException}. |
| * @throws IOException if there is a low-level IO error |
| * @return null if there are no changes; else, a new |
| * DirectoryReader instance. |
| */ |
| protected abstract DirectoryReader doOpenIfChanged(final IndexCommit commit) throws IOException; |
| |
| /** Implement this method to support {@link #openIfChanged(DirectoryReader,IndexWriter,boolean)}. |
| * If this reader does not support reopen from {@link IndexWriter}, |
| * throw {@link UnsupportedOperationException}. |
| * @throws IOException if there is a low-level IO error |
| * @return null if there are no changes; else, a new |
| * DirectoryReader instance. |
| */ |
| protected abstract DirectoryReader doOpenIfChanged(IndexWriter writer, boolean applyAllDeletes) throws IOException; |
| |
| /** |
| * Version number when this IndexReader was opened. |
| * |
| * <p>This method |
| * returns the version recorded in the commit that the |
| * reader opened. This version is advanced every time |
| * a change is made with {@link IndexWriter}.</p> |
| */ |
| public abstract long getVersion(); |
| |
| /** |
| * Check whether any new changes have occurred to the |
| * index since this reader was opened. |
| * |
| * <p>If this reader was created by calling {@link #open}, |
| * then this method checks if any further commits |
| * (see {@link IndexWriter#commit}) have occurred in the |
| * directory.</p> |
| * |
| * <p>If instead this reader is a near real-time reader |
| * (ie, obtained by a call to {@link |
| * DirectoryReader#open(IndexWriter)}, or by calling {@link #openIfChanged} |
| * on a near real-time reader), then this method checks if |
| * either a new commit has occurred, or any new |
| * uncommitted changes have taken place via the writer. |
| * Note that even if the writer has only performed |
| * merging, this method will still return false.</p> |
| * |
| * <p>In any event, if this returns false, you should call |
| * {@link #openIfChanged} to get a new reader that sees the |
| * changes.</p> |
| * |
| * @throws IOException if there is a low-level IO error |
| */ |
| public abstract boolean isCurrent() throws IOException; |
| |
| /** |
| * Expert: return the IndexCommit that this reader has opened. |
| * @lucene.experimental |
| */ |
| public abstract IndexCommit getIndexCommit() throws IOException; |
| |
| } |