| package org.apache.lucene.index; |
| |
| /* ==================================================================== |
| * The Apache Software License, Version 1.1 |
| * |
| * Copyright (c) 2001 The Apache Software Foundation. All rights |
| * reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in |
| * the documentation and/or other materials provided with the |
| * distribution. |
| * |
| * 3. The end-user documentation included with the redistribution, |
| * if any, must include the following acknowledgment: |
| * "This product includes software developed by the |
| * Apache Software Foundation (http://www.apache.org/)." |
| * Alternately, this acknowledgment may appear in the software itself, |
| * if and wherever such third-party acknowledgments normally appear. |
| * |
| * 4. The names "Apache" and "Apache Software Foundation" and |
| * "Apache Lucene" must not be used to endorse or promote products |
| * derived from this software without prior written permission. For |
| * written permission, please contact apache@apache.org. |
| * |
| * 5. Products derived from this software may not be called "Apache", |
| * "Apache Lucene", nor may "Apache" appear in their name, without |
| * prior written permission of the Apache Software Foundation. |
| * |
| * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
| * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
| * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
| * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| * ==================================================================== |
| * |
| * This software consists of voluntary contributions made by many |
| * individuals on behalf of the Apache Software Foundation. For more |
| * information on the Apache Software Foundation, please see |
| * <http://www.apache.org/>. |
| */ |
| |
| import java.io.IOException; |
| import java.io.File; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.store.Lock; |
| import org.apache.lucene.document.Document; |
| |
| /** IndexReader is an abstract class, providing an interface for accessing an |
| index. Search of an index is done entirely through this abstract interface, |
| so that any subclass which implements it is searchable. |
| |
| <p> Concrete subclasses of IndexReader are usually constructed with a call to |
| the static method {@link #open}. |
| |
| <p> For efficiency, in this API documents are often referred to via |
| <it>document numbers</it>, non-negative integers which each name a unique |
| document in the index. These document numbers are ephemeral--they may change |
| as documents are added to and deleted from an index. Clients should thus not |
| rely on a given document having the same number between sessions. */ |
| |
| abstract public class IndexReader { |
| protected IndexReader() {}; |
| |
| /** Returns an IndexReader reading the index in an FSDirectory in the named |
| path. */ |
| public static IndexReader open(String path) throws IOException { |
| return open(FSDirectory.getDirectory(path, false)); |
| } |
| |
| /** Returns an IndexReader reading the index in an FSDirectory in the named |
| path. */ |
| public static IndexReader open(File path) throws IOException { |
| return open(FSDirectory.getDirectory(path, false)); |
| } |
| |
| /** Returns an IndexReader reading the index in the given Directory. */ |
| public static IndexReader open(final Directory directory) throws IOException{ |
| synchronized (directory) { // in- & inter-process sync |
| return (IndexReader)new Lock.With(directory.makeLock("commit.lock")) { |
| public Object doBody() throws IOException { |
| SegmentInfos infos = new SegmentInfos(); |
| infos.read(directory); |
| if (infos.size() == 1) // index is optimized |
| return new SegmentReader(infos.info(0), true); |
| |
| SegmentReader[] readers = new SegmentReader[infos.size()]; |
| for (int i = 0; i < infos.size(); i++) |
| readers[i] = new SegmentReader(infos.info(i), i==infos.size()-1); |
| return new SegmentsReader(readers); |
| } |
| }.run(); |
| } |
| } |
| |
| /** Returns the time the index in the named directory was last modified. */ |
| public static long lastModified(String directory) throws IOException { |
| return lastModified(new File(directory)); |
| } |
| |
| /** Returns the time the index in the named directory was last modified. */ |
| public static long lastModified(File directory) throws IOException { |
| return FSDirectory.fileModified(directory, "segments"); |
| } |
| |
| /** Returns the time the index in this directory was last modified. */ |
| public static long lastModified(Directory directory) throws IOException { |
| return directory.fileModified("segments"); |
| } |
| |
| /** Returns the number of documents in this index. */ |
| abstract public int numDocs(); |
| /** Returns one greater than the largest possible document number. |
| This may be used to, e.g., determine how big to allocate an array which |
| will have an element for every document number in an index. |
| */ |
| abstract public int maxDoc(); |
| /** Returns the stored fields of the <code>n</code><sup>th</sup> |
| <code>Document</code> in this index. */ |
| abstract public Document document(int n) throws IOException; |
| |
| /** Returns true if document <i>n</i> has been deleted */ |
| abstract public boolean isDeleted(int n); |
| |
| /** Returns the byte-encoded normalization factor for the named field of |
| every document. This is used by the search code to score documents. |
| @see org.apache.lucene.search.Similarity#norm |
| */ |
| abstract public byte[] norms(String field) throws IOException; |
| |
| /** Returns an enumeration of all the terms in the index. |
| The enumeration is ordered by Term.compareTo(). Each term |
| is greater than all that precede it in the enumeration. |
| */ |
| abstract public TermEnum terms() throws IOException; |
| /** Returns an enumeration of all terms after a given term. |
| The enumeration is ordered by Term.compareTo(). Each term |
| is greater than all that precede it in the enumeration. |
| */ |
| abstract public TermEnum terms(Term t) throws IOException; |
| |
| /** Returns the number of documents containing the term <code>t</code>. */ |
| abstract public int docFreq(Term t) throws IOException; |
| |
| /** Returns an enumeration of all the documents which contain |
| <code>Term</code>. For each document, the document number, the frequency of |
| the term in that document is also provided, for use in search scoring. |
| Thus, this method implements the mapping: |
| <p><ul> |
| Term => <docNum, freq><sup>*</sup> |
| </ul> |
| <p>The enumeration is ordered by document number. Each document number |
| is greater than all that precede it in the enumeration. */ |
| abstract public TermDocs termDocs(Term t) throws IOException; |
| |
| /** Returns an enumeration of all the documents which contain |
| <code>Term</code>. For each document, in addition to the document number |
| and frequency of the term in that document, a list of all of the ordinal |
| positions of the term in the document is available. Thus, this method |
| implements the mapping: |
| |
| <p><ul> |
| Term => <docNum, freq, |
| <pos<sub>1</sub>, pos<sub>2</sub>, ... |
| pos<sub>freq-1</sub>> |
| ><sup>*</sup> |
| </ul> |
| <p> This positional information faciliates phrase and proximity searching. |
| <p>The enumeration is ordered by document number. Each document number is |
| greater than all that precede it in the enumeration. */ |
| abstract public TermPositions termPositions(Term t) throws IOException; |
| |
| /** Deletes the document numbered <code>docNum</code>. Once a document is |
| deleted it will not appear in TermDocs or TermPostitions enumerations. |
| Attempts to read its field with the {@link #document} |
| method will result in an error. The presence of this document may still be |
| reflected in the {@link #docFreq} statistic, though |
| this will be corrected eventually as the index is further modified. */ |
| abstract public void delete(int docNum) throws IOException; |
| |
| /** Deletes all documents containing <code>term</code>. |
| This is useful if one uses a document field to hold a unique ID string for |
| the document. Then to delete such a document, one merely constructs a |
| term with the appropriate field and the unique ID string as its text and |
| passes it to this method. Returns the number of documents deleted. */ |
| public final int delete(Term term) throws IOException { |
| TermDocs docs = termDocs(term); |
| if ( docs == null ) return 0; |
| int n = 0; |
| try { |
| while (docs.next()) { |
| delete(docs.doc()); |
| n++; |
| } |
| } finally { |
| docs.close(); |
| } |
| return n; |
| } |
| |
| /** Closes files associated with this index. |
| Also saves any new deletions to disk. |
| No other methods should be called after this has been called. */ |
| abstract public void close() throws IOException; |
| } |