lucene/src/java/org/apache/lucene/index/codecs/DocValuesConsumer.java - lucene-solr - Git at Google

 package org.apache.lucene.index.codecs;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 import java.io.IOException;
 import java.util.Collection;
 import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.values.IndexDocValues;
 import org.apache.lucene.index.values.PerDocFieldValues;
 import org.apache.lucene.index.values.Writer;
 import org.apache.lucene.util.Bits;

 /**
  * Abstract API that consumes {@link PerDocFieldValues}.
  * {@link DocValuesConsumer} are always associated with a specific field and
  * segments. Concrete implementations of this API write the given
  * {@link PerDocFieldValues} into a implementation specific format depending on
  * the fields meta-data.
  *
  * @lucene.experimental
  */
 public abstract class DocValuesConsumer {

   protected final AtomicLong bytesUsed;

   /**
    * Creates a new {@link DocValuesConsumer}.
    *
    * @param bytesUsed
    *          bytes-usage tracking reference used by implementation to track
    *          internally allocated memory. All tracked bytes must be released
    *          once {@link #finish(int)} has been called.
    */
   protected DocValuesConsumer(AtomicLong bytesUsed) {
     this.bytesUsed = bytesUsed == null ? new AtomicLong(0) : bytesUsed;
   }

   /**
    * Adds the given {@link PerDocFieldValues} instance to this
    * {@link DocValuesConsumer}
    *
    * @param docID
    *          the document ID to add the value for. The docID must always
    *          increase or be <tt>0</tt> if it is the first call to this method.
    * @param docValues
    *          the values to add
    * @throws IOException
    *           if an {@link IOException} occurs
    */
   public abstract void add(int docID, PerDocFieldValues docValues)
       throws IOException;

   /**
    * Called when the consumer of this API is doc with adding
    * {@link PerDocFieldValues} to this {@link DocValuesConsumer}
    *
    * @param docCount
    *          the total number of documents in this {@link DocValuesConsumer}.
    *          Must be greater than or equal the last given docID to
    *          {@link #add(int, PerDocFieldValues)}.
    * @throws IOException
    */
   public abstract void finish(int docCount) throws IOException;

   /**
    * Gathers files associated with this {@link DocValuesConsumer}
    *
    * @param files
    *          the of files to add the consumers files to.
    */
   public abstract void files(Collection<String> files) throws IOException;

   /**
    * Merges the given {@link org.apache.lucene.index.codecs.MergeState} into
    * this {@link DocValuesConsumer}.
    *
    * @param mergeState
    *          the state to merge
    * @param values
    *          the docValues to merge in
    * @throws IOException
    *           if an {@link IOException} occurs
    */
   public void merge(org.apache.lucene.index.codecs.MergeState mergeState,
       IndexDocValues values) throws IOException {
     assert mergeState != null;
     // TODO we need some kind of compatibility notation for values such
     // that two slightly different segments can be merged eg. fixed vs.
     // variable byte len or float32 vs. float64
     boolean merged = false;
     /*
      * We ignore the given DocValues here and merge from the subReaders directly
      * to support bulk copies on the DocValues Writer level. if this gets merged
      * with MultiDocValues the writer can not optimize for bulk-copyable data
      */
     for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
       final org.apache.lucene.index.codecs.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
       final IndexDocValues r = reader.reader.docValues(mergeState.fieldInfo.name);
       if (r != null) {
         merged = true;
         merge(new Writer.MergeState(r, mergeState.docBase[readerIDX], reader.reader.maxDoc(),
                                     reader.liveDocs));
       }
     }
     if (merged) {
       finish(mergeState.mergedDocCount);
     }
   }

   /**
    * Merges the given {@link MergeState} into this {@link DocValuesConsumer}.
    * {@link MergeState#docBase} must always be increasing. Merging segments out
    * of order is not supported.
    *
    * @param mergeState
    *          the {@link MergeState} to merge
    * @throws IOException
    *           if an {@link IOException} occurs
    */
   protected abstract void merge(MergeState mergeState) throws IOException;

   /**
    * Specialized auxiliary MergeState is necessary since we don't want to
    * exploit internals up to the codecs consumer. An instance of this class is
    * created for each merged low level {@link IndexReader} we are merging to
    * support low level bulk copies.
    */
   public static class MergeState {
     /**
      * the source reader for this MergeState - merged values should be read from
      * this instance
      */
     public final IndexDocValues reader;
     /** the absolute docBase for this MergeState within the resulting segment */
     public final int docBase;
     /** the number of documents in this MergeState */
     public final int docCount;
     /** the not deleted bits for this MergeState */
     public final Bits liveDocs;

     public MergeState(IndexDocValues reader, int docBase, int docCount, Bits liveDocs) {
       assert reader != null;
       this.reader = reader;
       this.docBase = docBase;
       this.docCount = docCount;
       this.liveDocs = liveDocs;
     }
   }
 }
	package org.apache.lucene.index.codecs;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	import java.io.IOException;
	import java.util.Collection;
	import java.util.concurrent.atomic.AtomicLong;

	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.values.IndexDocValues;
	import org.apache.lucene.index.values.PerDocFieldValues;
	import org.apache.lucene.index.values.Writer;
	import org.apache.lucene.util.Bits;

	/**
	* Abstract API that consumes {@link PerDocFieldValues}.
	* {@link DocValuesConsumer} are always associated with a specific field and
	* segments. Concrete implementations of this API write the given
	* {@link PerDocFieldValues} into a implementation specific format depending on
	* the fields meta-data.
	*
	* @lucene.experimental
	*/
	public abstract class DocValuesConsumer {

	protected final AtomicLong bytesUsed;

	/**
	* Creates a new {@link DocValuesConsumer}.
	*
	* @param bytesUsed
	* bytes-usage tracking reference used by implementation to track
	* internally allocated memory. All tracked bytes must be released
	* once {@link #finish(int)} has been called.
	*/
	protected DocValuesConsumer(AtomicLong bytesUsed) {
	this.bytesUsed = bytesUsed == null ? new AtomicLong(0) : bytesUsed;
	}

	/**
	* Adds the given {@link PerDocFieldValues} instance to this
	* {@link DocValuesConsumer}
	*
	* @param docID
	* the document ID to add the value for. The docID must always
	* increase or be <tt>0</tt> if it is the first call to this method.
	* @param docValues
	* the values to add
	* @throws IOException
	* if an {@link IOException} occurs
	*/
	public abstract void add(int docID, PerDocFieldValues docValues)
	throws IOException;

	/**
	* Called when the consumer of this API is doc with adding
	* {@link PerDocFieldValues} to this {@link DocValuesConsumer}
	*
	* @param docCount
	* the total number of documents in this {@link DocValuesConsumer}.
	* Must be greater than or equal the last given docID to
	* {@link #add(int, PerDocFieldValues)}.
	* @throws IOException
	*/
	public abstract void finish(int docCount) throws IOException;

	/**
	* Gathers files associated with this {@link DocValuesConsumer}
	*
	* @param files
	* the of files to add the consumers files to.
	*/
	public abstract void files(Collection<String> files) throws IOException;

	/**
	* Merges the given {@link org.apache.lucene.index.codecs.MergeState} into
	* this {@link DocValuesConsumer}.
	*
	* @param mergeState
	* the state to merge
	* @param values
	* the docValues to merge in
	* @throws IOException
	* if an {@link IOException} occurs
	*/
	public void merge(org.apache.lucene.index.codecs.MergeState mergeState,
	IndexDocValues values) throws IOException {
	assert mergeState != null;
	// TODO we need some kind of compatibility notation for values such
	// that two slightly different segments can be merged eg. fixed vs.
	// variable byte len or float32 vs. float64
	boolean merged = false;
	/*
	* We ignore the given DocValues here and merge from the subReaders directly
	* to support bulk copies on the DocValues Writer level. if this gets merged
	* with MultiDocValues the writer can not optimize for bulk-copyable data
	*/
	for(int readerIDX=0;readerIDX<mergeState.readers.size();readerIDX++) {
	final org.apache.lucene.index.codecs.MergeState.IndexReaderAndLiveDocs reader = mergeState.readers.get(readerIDX);
	final IndexDocValues r = reader.reader.docValues(mergeState.fieldInfo.name);
	if (r != null) {
	merged = true;
	merge(new Writer.MergeState(r, mergeState.docBase[readerIDX], reader.reader.maxDoc(),
	reader.liveDocs));
	}
	}
	if (merged) {
	finish(mergeState.mergedDocCount);
	}
	}

	/**
	* Merges the given {@link MergeState} into this {@link DocValuesConsumer}.
	* {@link MergeState#docBase} must always be increasing. Merging segments out
	* of order is not supported.
	*
	* @param mergeState
	* the {@link MergeState} to merge
	* @throws IOException
	* if an {@link IOException} occurs
	*/
	protected abstract void merge(MergeState mergeState) throws IOException;

	/**
	* Specialized auxiliary MergeState is necessary since we don't want to
	* exploit internals up to the codecs consumer. An instance of this class is
	* created for each merged low level {@link IndexReader} we are merging to
	* support low level bulk copies.
	*/
	public static class MergeState {
	/**
	* the source reader for this MergeState - merged values should be read from
	* this instance
	*/
	public final IndexDocValues reader;
	/** the absolute docBase for this MergeState within the resulting segment */
	public final int docBase;
	/** the number of documents in this MergeState */
	public final int docCount;
	/** the not deleted bits for this MergeState */
	public final Bits liveDocs;

	public MergeState(IndexDocValues reader, int docBase, int docCount, Bits liveDocs) {
	assert reader != null;
	this.reader = reader;
	this.docBase = docBase;
	this.docCount = docCount;
	this.liveDocs = liveDocs;
	}
	}
	}