src/java/org/apache/lucene/index/TermsHash.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.util.Collection;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.HashSet;
 import java.util.Arrays;
 import java.io.IOException;

 import org.apache.lucene.util.ArrayUtil;

 /** This class implements {@link InvertedDocConsumer}, which
  *  is passed each token produced by the analyzer on each
  *  field.  It stores these tokens in a hash table, and
  *  allocates separate byte streams per token.  Consumers of
  *  this class, eg {@link FreqProxTermsWriter} and {@link
  *  TermVectorsTermsWriter}, write their own byte streams
  *  under each term.
  */

 final class TermsHash extends InvertedDocConsumer {

   final TermsHashConsumer consumer;
   final TermsHash nextTermsHash;
   final int bytesPerPosting;
   final int postingsFreeChunk;
   final DocumentsWriter docWriter;

   private TermsHash primaryTermsHash;

   private RawPostingList[] postingsFreeList = new RawPostingList[1];
   private int postingsFreeCount;
   private int postingsAllocCount;
   boolean trackAllocations;

   public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
     this.docWriter = docWriter;
     this.consumer = consumer;
     this.nextTermsHash = nextTermsHash;
     this.trackAllocations = trackAllocations;

     // Why + 4*POINTER_NUM_BYTE below?
     //   +1: Posting is referenced by postingsFreeList array
     //   +3: Posting is referenced by hash, which
     //       targets 25-50% fill factor; approximate this
     //       as 3X # pointers
     bytesPerPosting = consumer.bytesPerPosting() + 4*DocumentsWriter.POINTER_NUM_BYTE;
     postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
   }

   InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
     return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
   }

   TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
     return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
   }

   void setFieldInfos(FieldInfos fieldInfos) {
     this.fieldInfos = fieldInfos;
     consumer.setFieldInfos(fieldInfos);
   }

   synchronized public void abort() {
     consumer.abort();
     if (nextTermsHash != null)
       nextTermsHash.abort();
   }

   void shrinkFreePostings(Map threadsAndFields, DocumentsWriter.FlushState state) {

     assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer;

     final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount);
     if (newSize != postingsFreeList.length) {
       RawPostingList[] newArray = new RawPostingList[newSize];
       System.arraycopy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
       postingsFreeList = newArray;
     }
   }

   synchronized void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
     consumer.closeDocStore(state);
     if (nextTermsHash != null)
       nextTermsHash.closeDocStore(state);
   }

   synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
     Map childThreadsAndFields = new HashMap();
     Map nextThreadsAndFields;

     if (nextTermsHash != null)
       nextThreadsAndFields = new HashMap();
     else
       nextThreadsAndFields = null;

     Iterator it = threadsAndFields.entrySet().iterator();
     while(it.hasNext()) {

       Map.Entry entry = (Map.Entry) it.next();

       TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();

       Collection fields = (Collection) entry.getValue();

       Iterator fieldsIt = fields.iterator();
       Collection childFields = new HashSet();
       Collection nextChildFields;

       if (nextTermsHash != null)
         nextChildFields = new HashSet();
       else
         nextChildFields = null;

       while(fieldsIt.hasNext()) {
         TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
         childFields.add(perField.consumer);
         if (nextTermsHash != null)
           nextChildFields.add(perField.nextPerField);
       }

       childThreadsAndFields.put(perThread.consumer, childFields);
       if (nextTermsHash != null)
         nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
     }

     consumer.flush(childThreadsAndFields, state);

     shrinkFreePostings(threadsAndFields, state);

     if (nextTermsHash != null)
       nextTermsHash.flush(nextThreadsAndFields, state);
   }

   synchronized public boolean freeRAM() {

     if (!trackAllocations)
       return false;

     boolean any;
     final int numToFree;
     if (postingsFreeCount >= postingsFreeChunk)
       numToFree = postingsFreeChunk;
     else
       numToFree = postingsFreeCount;
     any = numToFree > 0;
     if (any) {
       Arrays.fill(postingsFreeList, postingsFreeCount-numToFree, postingsFreeCount, null);
       postingsFreeCount -= numToFree;
       postingsAllocCount -= numToFree;
       docWriter.bytesAllocated(-numToFree * bytesPerPosting);
       any = true;
     }

     if (nextTermsHash != null)
       any |= nextTermsHash.freeRAM();

     return any;
   }

   synchronized public void recyclePostings(final RawPostingList[] postings, final int numPostings) {

     assert postings.length >= numPostings;

     // Move all Postings from this ThreadState back to our
     // free list.  We pre-allocated this array while we were
     // creating Postings to make sure it's large enough
     assert postingsFreeCount + numPostings <= postingsFreeList.length;
     System.arraycopy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
     postingsFreeCount += numPostings;
   }

   synchronized public void getPostings(final RawPostingList[] postings) {

     assert docWriter.writer.testPoint("TermsHash.getPostings start");

     assert postingsFreeCount <= postingsFreeList.length;
     assert postingsFreeCount <= postingsAllocCount: "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount;

     final int numToCopy;
     if (postingsFreeCount < postings.length)
       numToCopy = postingsFreeCount;
     else
       numToCopy = postings.length;
     final int start = postingsFreeCount-numToCopy;
     assert start >= 0;
     assert start + numToCopy <= postingsFreeList.length;
     assert numToCopy <= postings.length;
     System.arraycopy(postingsFreeList, start,
                      postings, 0, numToCopy);

     // Directly allocate the remainder if any
     if (numToCopy != postings.length) {
       final int extra = postings.length - numToCopy;
       final int newPostingsAllocCount = postingsAllocCount + extra;

       consumer.createPostings(postings, numToCopy, extra);
       assert docWriter.writer.testPoint("TermsHash.getPostings after create");
       postingsAllocCount += extra;

       if (trackAllocations)
         docWriter.bytesAllocated(extra * bytesPerPosting);

       if (newPostingsAllocCount > postingsFreeList.length)
         // Pre-allocate the postingsFreeList so it's large
         // enough to hold all postings we've given out
         postingsFreeList = new RawPostingList[ArrayUtil.getNextSize(newPostingsAllocCount)];
     }

     postingsFreeCount -= numToCopy;

     if (trackAllocations)
       docWriter.bytesUsed(postings.length * bytesPerPosting);
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.util.Collection;
	import java.util.Map;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.HashSet;
	import java.util.Arrays;
	import java.io.IOException;

	import org.apache.lucene.util.ArrayUtil;

	/** This class implements {@link InvertedDocConsumer}, which
	* is passed each token produced by the analyzer on each
	* field. It stores these tokens in a hash table, and
	* allocates separate byte streams per token. Consumers of
	* this class, eg {@link FreqProxTermsWriter} and {@link
	* TermVectorsTermsWriter}, write their own byte streams
	* under each term.
	*/

	final class TermsHash extends InvertedDocConsumer {

	final TermsHashConsumer consumer;
	final TermsHash nextTermsHash;
	final int bytesPerPosting;
	final int postingsFreeChunk;
	final DocumentsWriter docWriter;

	private TermsHash primaryTermsHash;

	private RawPostingList[] postingsFreeList = new RawPostingList[1];
	private int postingsFreeCount;
	private int postingsAllocCount;
	boolean trackAllocations;

	public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
	this.docWriter = docWriter;
	this.consumer = consumer;
	this.nextTermsHash = nextTermsHash;
	this.trackAllocations = trackAllocations;

	// Why + 4*POINTER_NUM_BYTE below?
	// +1: Posting is referenced by postingsFreeList array
	// +3: Posting is referenced by hash, which
	// targets 25-50% fill factor; approximate this
	// as 3X # pointers
	bytesPerPosting = consumer.bytesPerPosting() + 4*DocumentsWriter.POINTER_NUM_BYTE;
	postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting);
	}

	InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
	return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
	}

	TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
	return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
	}

	void setFieldInfos(FieldInfos fieldInfos) {
	this.fieldInfos = fieldInfos;
	consumer.setFieldInfos(fieldInfos);
	}

	synchronized public void abort() {
	consumer.abort();
	if (nextTermsHash != null)
	nextTermsHash.abort();
	}

	void shrinkFreePostings(Map threadsAndFields, DocumentsWriter.FlushState state) {

	assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer;

	final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount);
	if (newSize != postingsFreeList.length) {
	RawPostingList[] newArray = new RawPostingList[newSize];
	System.arraycopy(postingsFreeList, 0, newArray, 0, postingsFreeCount);
	postingsFreeList = newArray;
	}
	}

	synchronized void closeDocStore(DocumentsWriter.FlushState state) throws IOException {
	consumer.closeDocStore(state);
	if (nextTermsHash != null)
	nextTermsHash.closeDocStore(state);
	}

	synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException {
	Map childThreadsAndFields = new HashMap();
	Map nextThreadsAndFields;

	if (nextTermsHash != null)
	nextThreadsAndFields = new HashMap();
	else
	nextThreadsAndFields = null;

	Iterator it = threadsAndFields.entrySet().iterator();
	while(it.hasNext()) {

	Map.Entry entry = (Map.Entry) it.next();

	TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();

	Collection fields = (Collection) entry.getValue();

	Iterator fieldsIt = fields.iterator();
	Collection childFields = new HashSet();
	Collection nextChildFields;

	if (nextTermsHash != null)
	nextChildFields = new HashSet();
	else
	nextChildFields = null;

	while(fieldsIt.hasNext()) {
	TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
	childFields.add(perField.consumer);
	if (nextTermsHash != null)
	nextChildFields.add(perField.nextPerField);
	}

	childThreadsAndFields.put(perThread.consumer, childFields);
	if (nextTermsHash != null)
	nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
	}

	consumer.flush(childThreadsAndFields, state);

	shrinkFreePostings(threadsAndFields, state);

	if (nextTermsHash != null)
	nextTermsHash.flush(nextThreadsAndFields, state);
	}

	synchronized public boolean freeRAM() {

	if (!trackAllocations)
	return false;

	boolean any;
	final int numToFree;
	if (postingsFreeCount >= postingsFreeChunk)
	numToFree = postingsFreeChunk;
	else
	numToFree = postingsFreeCount;
	any = numToFree > 0;
	if (any) {
	Arrays.fill(postingsFreeList, postingsFreeCount-numToFree, postingsFreeCount, null);
	postingsFreeCount -= numToFree;
	postingsAllocCount -= numToFree;
	docWriter.bytesAllocated(-numToFree * bytesPerPosting);
	any = true;
	}

	if (nextTermsHash != null)
	any \|= nextTermsHash.freeRAM();

	return any;
	}

	synchronized public void recyclePostings(final RawPostingList[] postings, final int numPostings) {

	assert postings.length >= numPostings;

	// Move all Postings from this ThreadState back to our
	// free list. We pre-allocated this array while we were
	// creating Postings to make sure it's large enough
	assert postingsFreeCount + numPostings <= postingsFreeList.length;
	System.arraycopy(postings, 0, postingsFreeList, postingsFreeCount, numPostings);
	postingsFreeCount += numPostings;
	}

	synchronized public void getPostings(final RawPostingList[] postings) {

	assert docWriter.writer.testPoint("TermsHash.getPostings start");

	assert postingsFreeCount <= postingsFreeList.length;
	assert postingsFreeCount <= postingsAllocCount: "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount;

	final int numToCopy;
	if (postingsFreeCount < postings.length)
	numToCopy = postingsFreeCount;
	else
	numToCopy = postings.length;
	final int start = postingsFreeCount-numToCopy;
	assert start >= 0;
	assert start + numToCopy <= postingsFreeList.length;
	assert numToCopy <= postings.length;
	System.arraycopy(postingsFreeList, start,
	postings, 0, numToCopy);

	// Directly allocate the remainder if any
	if (numToCopy != postings.length) {
	final int extra = postings.length - numToCopy;
	final int newPostingsAllocCount = postingsAllocCount + extra;

	consumer.createPostings(postings, numToCopy, extra);
	assert docWriter.writer.testPoint("TermsHash.getPostings after create");
	postingsAllocCount += extra;

	if (trackAllocations)
	docWriter.bytesAllocated(extra * bytesPerPosting);

	if (newPostingsAllocCount > postingsFreeList.length)
	// Pre-allocate the postingsFreeList so it's large
	// enough to hold all postings we've given out
	postingsFreeList = new RawPostingList[ArrayUtil.getNextSize(newPostingsAllocCount)];
	}

	postingsFreeCount -= numToCopy;

	if (trackAllocations)
	docWriter.bytesUsed(postings.length * bytesPerPosting);
	}
	}