lucene/src/java/org/apache/lucene/index/Terms.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Comparator;

 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.apache.lucene.util.automaton.CompiledAutomaton;

 /**
  * Access to the terms in a specific field.  See {@link Fields}.
  * @lucene.experimental
  */

 public abstract class Terms {

   // Privately cache a TermsEnum per-thread for looking up
   // docFreq and getting a private DocsEnum
   private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();

   /** Returns an iterator that will step through all
    *  terms. This method will not return null.  If you have
    *  a previous TermsEnum, for example from a different
    *  field, you can pass it for possible reuse if the
    *  implementation can do so. */
   public abstract TermsEnum iterator(TermsEnum reuse) throws IOException;

   /** Returns a TermsEnum that iterates over all terms that
    *  are accepted by the provided {@link
    *  CompiledAutomaton}.  If the <code>startTerm</code> is
    *  provided then the returned enum will only accept terms
    *  > <code>startTerm</code>, but you still must call
    *  next() first to get to the first term.  Note that the
    *  provided <code>startTerm</code> must be accepted by
    *  the automaton.
    *
    * <p><b>NOTE</b>: the returned TermsEnum cannot
    * seek</p>. */
   public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
     // TODO: eventually we could support seekCeil/Exact on
     // the returned enum, instead of only being able to seek
     // at the start
     if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
       throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
     }
     if (startTerm == null) {
       return new AutomatonTermsEnum(iterator(null), compiled);
     } else {
       return new AutomatonTermsEnum(iterator(null), compiled) {
         @Override
         protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
           if (term == null) {
             term = startTerm;
           }
           return super.nextSeekTerm(term);
         }
       };
     }
   }

   /** Return the BytesRef Comparator used to sort terms
    *  provided by the iterator.  This method may return null
    *  if there are no terms.  This method may be invoked
    *  many times; it's best to cache a single instance &
    *  reuse it. */
   public abstract Comparator<BytesRef> getComparator() throws IOException;

   /** Returns the number of documents containing the
    *  specified term text.  Returns 0 if the term does not
    *  exist. */
   public int docFreq(BytesRef text) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     if (termsEnum.seekExact(text, true)) {
       return termsEnum.docFreq();
     } else {
       return 0;
     }
   }

   /** Returns the total number of occurrences of this term
    *  across all documents (the sum of the freq() for each
    *  doc that has this term).  This will be -1 if the
    *  codec doesn't support this measure.  Note that, like
    *  other term measures, this measure does not take
    *  deleted documents into account. */
   public long totalTermFreq(BytesRef text) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     if (termsEnum.seekExact(text, true)) {
       return termsEnum.totalTermFreq();
     } else {
       return 0;
     }
   }

   /** Get {@link DocsEnum} for the specified term.  This
    *  method may return null if the term does not exist. */
   public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     if (termsEnum.seekExact(text, true)) {
       return termsEnum.docs(liveDocs, reuse);
     } else {
       return null;
     }
   }

   /** Get {@link DocsEnum} for the specified term.  This
    *  method will may return null if the term does not
    *  exists, or positions were not indexed. */
   public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     if (termsEnum.seekExact(text, true)) {
       return termsEnum.docsAndPositions(liveDocs, reuse);
     } else {
       return null;
     }
   }

   /**
    * Expert: Get {@link DocsEnum} for the specified {@link TermState}.
    * This method may return <code>null</code> if the term does not exist.
    *
    * @see TermsEnum#termState()
    * @see TermsEnum#seekExact(BytesRef, TermState) */
   public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     termsEnum.seekExact(term, termState);
     return termsEnum.docs(liveDocs, reuse);
   }

   /**
    * Get {@link DocsEnum} for the specified {@link TermState}. This
    * method will may return <code>null</code> if the term does not exists, or positions were
    * not indexed.
    *
    * @see TermsEnum#termState()
    * @see TermsEnum#seekExact(BytesRef, TermState) */
   public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
     final TermsEnum termsEnum = getThreadTermsEnum();
     termsEnum.seekExact(term, termState);
     return termsEnum.docsAndPositions(liveDocs, reuse);
   }

   /** Returns the number of terms for this field, or -1 if this
    *  measure isn't stored by the codec. Note that, just like
    *  other term measures, this measure does not take deleted
    *  documents into account. */
   public abstract long getUniqueTermCount() throws IOException;

   /** Returns the sum of {@link TermsEnum#totalTermFreq} for
    *  all terms in this field, or -1 if this measure isn't
    *  stored by the codec (or if this fields omits term freq
    *  and positions).  Note that, just like other term
    *  measures, this measure does not take deleted documents
    *  into account. */
   public abstract long getSumTotalTermFreq() throws IOException;

   /** Returns the sum of {@link #docFreq(BytesRef)} for
    *  all terms in this field, or -1 if this measure isn't
    *  stored by the codec.  Note that, just like other term
    *  measures, this measure does not take deleted documents
    *  into account. */
   public abstract long getSumDocFreq() throws IOException;

   /** Returns the number of documents that have at least one
    *  term for this field, or -1 if this measure isn't
    *  stored by the codec.  Note that, just like other term
    *  measures, this measure does not take deleted documents
    *  into account. */
   public abstract int getDocCount() throws IOException;

   /**
    * Returns a thread-private {@link TermsEnum} instance. Obtaining
    * {@link TermsEnum} from this method might be more efficient than using
    * {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
    * new {@link TermsEnum} instance.
    * <p>
    * NOTE: {@link TermsEnum} instances obtained from this method must not be
    * shared across threads. The enum should only be used within a local context
    * where other threads can't access it.
    *
    * @return a thread-private {@link TermsEnum} instance
    * @throws IOException
    *           if an IOException occurs
    * @lucene.internal
    */
   public TermsEnum getThreadTermsEnum() throws IOException {
     TermsEnum termsEnum = threadEnums.get();
     if (termsEnum == null) {
       termsEnum = iterator(null);
       threadEnums.set(termsEnum);
     }
     return termsEnum;
   }

   // subclass must close when done:
   protected void close() {
     threadEnums.close();
   }
   public final static Terms[] EMPTY_ARRAY = new Terms[0];
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Comparator;

	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.CloseableThreadLocal;
	import org.apache.lucene.util.automaton.CompiledAutomaton;

	/**
	* Access to the terms in a specific field. See {@link Fields}.
	* @lucene.experimental
	*/

	public abstract class Terms {

	// Privately cache a TermsEnum per-thread for looking up
	// docFreq and getting a private DocsEnum
	private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();

	/** Returns an iterator that will step through all
	* terms. This method will not return null. If you have
	* a previous TermsEnum, for example from a different
	* field, you can pass it for possible reuse if the
	* implementation can do so. */
	public abstract TermsEnum iterator(TermsEnum reuse) throws IOException;

	/** Returns a TermsEnum that iterates over all terms that
	* are accepted by the provided {@link
	* CompiledAutomaton}. If the <code>startTerm</code> is
	* provided then the returned enum will only accept terms
	* > <code>startTerm</code>, but you still must call
	* next() first to get to the first term. Note that the
	* provided <code>startTerm</code> must be accepted by
	* the automaton.
	*
	* <p><b>NOTE</b>: the returned TermsEnum cannot
	* seek</p>. */
	public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
	// TODO: eventually we could support seekCeil/Exact on
	// the returned enum, instead of only being able to seek
	// at the start
	if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
	throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
	}
	if (startTerm == null) {
	return new AutomatonTermsEnum(iterator(null), compiled);
	} else {
	return new AutomatonTermsEnum(iterator(null), compiled) {
	@Override
	protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
	if (term == null) {
	term = startTerm;
	}
	return super.nextSeekTerm(term);
	}
	};
	}
	}

	/** Return the BytesRef Comparator used to sort terms
	* provided by the iterator. This method may return null
	* if there are no terms. This method may be invoked
	* many times; it's best to cache a single instance &
	* reuse it. */
	public abstract Comparator<BytesRef> getComparator() throws IOException;

	/** Returns the number of documents containing the
	* specified term text. Returns 0 if the term does not
	* exist. */
	public int docFreq(BytesRef text) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	if (termsEnum.seekExact(text, true)) {
	return termsEnum.docFreq();
	} else {
	return 0;
	}
	}

	/** Returns the total number of occurrences of this term
	* across all documents (the sum of the freq() for each
	* doc that has this term). This will be -1 if the
	* codec doesn't support this measure. Note that, like
	* other term measures, this measure does not take
	* deleted documents into account. */
	public long totalTermFreq(BytesRef text) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	if (termsEnum.seekExact(text, true)) {
	return termsEnum.totalTermFreq();
	} else {
	return 0;
	}
	}

	/** Get {@link DocsEnum} for the specified term. This
	* method may return null if the term does not exist. */
	public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	if (termsEnum.seekExact(text, true)) {
	return termsEnum.docs(liveDocs, reuse);
	} else {
	return null;
	}
	}

	/** Get {@link DocsEnum} for the specified term. This
	* method will may return null if the term does not
	* exists, or positions were not indexed. */
	public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	if (termsEnum.seekExact(text, true)) {
	return termsEnum.docsAndPositions(liveDocs, reuse);
	} else {
	return null;
	}
	}

	/**
	* Expert: Get {@link DocsEnum} for the specified {@link TermState}.
	* This method may return <code>null</code> if the term does not exist.
	*
	* @see TermsEnum#termState()
	* @see TermsEnum#seekExact(BytesRef, TermState) */
	public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	termsEnum.seekExact(term, termState);
	return termsEnum.docs(liveDocs, reuse);
	}

	/**
	* Get {@link DocsEnum} for the specified {@link TermState}. This
	* method will may return <code>null</code> if the term does not exists, or positions were
	* not indexed.
	*
	* @see TermsEnum#termState()
	* @see TermsEnum#seekExact(BytesRef, TermState) */
	public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
	final TermsEnum termsEnum = getThreadTermsEnum();
	termsEnum.seekExact(term, termState);
	return termsEnum.docsAndPositions(liveDocs, reuse);
	}

	/** Returns the number of terms for this field, or -1 if this
	* measure isn't stored by the codec. Note that, just like
	* other term measures, this measure does not take deleted
	* documents into account. */
	public abstract long getUniqueTermCount() throws IOException;

	/** Returns the sum of {@link TermsEnum#totalTermFreq} for
	* all terms in this field, or -1 if this measure isn't
	* stored by the codec (or if this fields omits term freq
	* and positions). Note that, just like other term
	* measures, this measure does not take deleted documents
	* into account. */
	public abstract long getSumTotalTermFreq() throws IOException;

	/** Returns the sum of {@link #docFreq(BytesRef)} for
	* all terms in this field, or -1 if this measure isn't
	* stored by the codec. Note that, just like other term
	* measures, this measure does not take deleted documents
	* into account. */
	public abstract long getSumDocFreq() throws IOException;

	/** Returns the number of documents that have at least one
	* term for this field, or -1 if this measure isn't
	* stored by the codec. Note that, just like other term
	* measures, this measure does not take deleted documents
	* into account. */
	public abstract int getDocCount() throws IOException;

	/**
	* Returns a thread-private {@link TermsEnum} instance. Obtaining
	* {@link TermsEnum} from this method might be more efficient than using
	* {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
	* new {@link TermsEnum} instance.
	* <p>
	* NOTE: {@link TermsEnum} instances obtained from this method must not be
	* shared across threads. The enum should only be used within a local context
	* where other threads can't access it.
	*
	* @return a thread-private {@link TermsEnum} instance
	* @throws IOException
	* if an IOException occurs
	* @lucene.internal
	*/
	public TermsEnum getThreadTermsEnum() throws IOException {
	TermsEnum termsEnum = threadEnums.get();
	if (termsEnum == null) {
	termsEnum = iterator(null);
	threadEnums.set(termsEnum);
	}
	return termsEnum;
	}

	// subclass must close when done:
	protected void close() {
	threadEnums.close();
	}
	public final static Terms[] EMPTY_ARRAY = new Terms[0];
	}