src/java/org/apache/lucene/index/TermsHashPerField.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Arrays;

 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.util.UnicodeUtil;

 final class TermsHashPerField extends InvertedDocConsumerPerField {

   final TermsHashConsumerPerField consumer;
   final TermsHashPerField nextPerField;
   final TermsHashPerThread perThread;
   final DocumentsWriter.DocState docState;
   final DocInverter.FieldInvertState fieldState;

   // Copied from our perThread
   final CharBlockPool charPool;
   final IntBlockPool intPool;
   final ByteBlockPool bytePool;

   final int streamCount;
   final int numPostingInt;

   final FieldInfo fieldInfo;

   boolean postingsCompacted;
   int numPostings;
   private int postingsHashSize = 4;
   private int postingsHashHalfSize = postingsHashSize/2;
   private int postingsHashMask = postingsHashSize-1;
   private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize];
   private RawPostingList p;

   public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
     this.perThread = perThread;
     intPool = perThread.intPool;
     charPool = perThread.charPool;
     bytePool = perThread.bytePool;
     docState = perThread.docState;
     fieldState = docInverterPerField.fieldState;
     this.consumer = perThread.consumer.addField(this, fieldInfo);
     streamCount = consumer.getStreamCount();
     numPostingInt = 2*streamCount;
     this.fieldInfo = fieldInfo;
     if (nextPerThread != null)
       nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
     else
       nextPerField = null;
   }

   void shrinkHash(int targetSize) {
     assert postingsCompacted || numPostings == 0;

     // Cannot use ArrayUtil.shrink because we require power
     // of 2:
     int newSize = postingsHash.length;
     while(newSize >= 8 && newSize/4 > targetSize) {
       newSize /= 2;
     }

     if (newSize != postingsHash.length) {
       postingsHash = new RawPostingList[newSize];
       postingsHashSize = newSize;
       postingsHashHalfSize = newSize/2;
       postingsHashMask = newSize-1;
     }
   }

   public void reset() {
     if (!postingsCompacted)
       compactPostings();
     assert numPostings <= postingsHash.length;
     if (numPostings > 0) {
       perThread.termsHash.recyclePostings(postingsHash, numPostings);
       Arrays.fill(postingsHash, 0, numPostings, null);
       numPostings = 0;
     }
     postingsCompacted = false;
     if (nextPerField != null)
       nextPerField.reset();
   }

   synchronized public void abort() {
     reset();
     if (nextPerField != null)
       nextPerField.abort();
   }

   public void initReader(ByteSliceReader reader, RawPostingList p, int stream) {
     assert stream < streamCount;
     final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
     final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
     reader.init(bytePool,
                 p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
                 ints[upto+stream]);
   }

   private synchronized void compactPostings() {
     int upto = 0;
     for(int i=0;i<postingsHashSize;i++) {
       if (postingsHash[i] != null) {
         if (upto < i) {
           postingsHash[upto] = postingsHash[i];
           postingsHash[i] = null;
         }
         upto++;
       }
     }

     assert upto == numPostings;
     postingsCompacted = true;
   }

   /** Collapse the hash table & sort in-place. */
   public RawPostingList[] sortPostings() {
     compactPostings();
     quickSort(postingsHash, 0, numPostings-1);
     return postingsHash;
   }

   void quickSort(RawPostingList[] postings, int lo, int hi) {
     if (lo >= hi)
       return;
     else if (hi == 1+lo) {
       if (comparePostings(postings[lo], postings[hi]) > 0) {
         final RawPostingList tmp = postings[lo];
         postings[lo] = postings[hi];
         postings[hi] = tmp;
       }
       return;
     }

     int mid = (lo + hi) >>> 1;

     if (comparePostings(postings[lo], postings[mid]) > 0) {
       RawPostingList tmp = postings[lo];
       postings[lo] = postings[mid];
       postings[mid] = tmp;
     }

     if (comparePostings(postings[mid], postings[hi]) > 0) {
       RawPostingList tmp = postings[mid];
       postings[mid] = postings[hi];
       postings[hi] = tmp;

       if (comparePostings(postings[lo], postings[mid]) > 0) {
         RawPostingList tmp2 = postings[lo];
         postings[lo] = postings[mid];
         postings[mid] = tmp2;
       }
     }

     int left = lo + 1;
     int right = hi - 1;

     if (left >= right)
       return;

     RawPostingList partition = postings[mid];

     for (; ;) {
       while (comparePostings(postings[right], partition) > 0)
         --right;

       while (left < right && comparePostings(postings[left], partition) <= 0)
         ++left;

       if (left < right) {
         RawPostingList tmp = postings[left];
         postings[left] = postings[right];
         postings[right] = tmp;
         --right;
       } else {
         break;
       }
     }

     quickSort(postings, lo, left);
     quickSort(postings, left + 1, hi);
   }

   /** Compares term text for two Posting instance and
    *  returns -1 if p1 < p2; 1 if p1 > p2; else 0. */
   int comparePostings(RawPostingList p1, RawPostingList p2) {

     if (p1 == p2)
       return 0;

     final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
     int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
     final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
     int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

     assert text1 != text2 || pos1 != pos2;

     while(true) {
       final char c1 = text1[pos1++];
       final char c2 = text2[pos2++];
       if (c1 != c2) {
         if (0xffff == c2)
           return 1;
         else if (0xffff == c1)
           return -1;
         else
           return c1-c2;
       } else
         // This method should never compare equal postings
         // unless p1==p2
         assert c1 != 0xffff;
     }
   }

   /** Test whether the text for current RawPostingList p equals
    *  current tokenText. */
   private boolean postingEquals(final char[] tokenText, final int tokenTextLen) {

     final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
     assert text != null;
     int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

     int tokenPos = 0;
     for(;tokenPos<tokenTextLen;pos++,tokenPos++)
       if (tokenText[tokenPos] != text[pos])
         return false;
     return 0xffff == text[pos];
   }

   private boolean doCall;
   private boolean doNextCall;

   boolean start(Fieldable[] fields, int count) throws IOException {
     doCall = consumer.start(fields, count);
     if (nextPerField != null)
       doNextCall = nextPerField.start(fields, count);
     return doCall || doNextCall;
   }

   // Secondary entry point (for 2nd & subsequent TermsHash),
   // because token text has already been "interned" into
   // textStart, so we hash by textStart
   public void add(Token token, int textStart) throws IOException {

     int code = textStart;

     int hashPos = code & postingsHashMask;

     assert !postingsCompacted;

     // Locate RawPostingList in hash
     p = postingsHash[hashPos];

     if (p != null && p.textStart != textStart) {
       // Conflict: keep searching different locations in
       // the hash table.
       final int inc = ((code>>8)+code)|1;
       do {
         code += inc;
         hashPos = code & postingsHashMask;
         p = postingsHash[hashPos];
       } while (p != null && p.textStart != textStart);
     }

     if (p == null) {

       // First time we are seeing this token since we last
       // flushed the hash.

       // Refill?
       if (0 == perThread.freePostingsCount)
         perThread.morePostings();

       // Pull next free RawPostingList from free list
       p = perThread.freePostings[--perThread.freePostingsCount];
       assert p != null;

       p.textStart = textStart;

       assert postingsHash[hashPos] == null;
       postingsHash[hashPos] = p;
       numPostings++;

       if (numPostings == postingsHashHalfSize)
         rehashPostings(2*postingsHashSize);

       // Init stream slices
       if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
         intPool.nextBuffer();

       if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
         bytePool.nextBuffer();

       intUptos = intPool.buffer;
       intUptoStart = intPool.intUpto;
       intPool.intUpto += streamCount;

       p.intStart = intUptoStart + intPool.intOffset;

       for(int i=0;i<streamCount;i++) {
         final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
         intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
       }
       p.byteStart = intUptos[intUptoStart];

       consumer.newTerm(token, p);

     } else {
       intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
       intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
       consumer.addTerm(token, p);
     }
   }

   // Primary entry point (for first TermsHash)
   void add(Token token) throws IOException {

     assert !postingsCompacted;

     // We are first in the chain so we must "intern" the
     // term text into textStart address

     // Get the text of this term.
     final char[] tokenText = token.termBuffer();
     final int tokenTextLen = token.termLength();

     // Compute hashcode & replace any invalid UTF16 sequences
     int downto = tokenTextLen;
     int code = 0;
     while (downto > 0) {
       char ch = tokenText[--downto];

       if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) {
         if (0 == downto) {
           // Unpaired
           ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
         } else {
           final char ch2 = tokenText[downto-1];
           if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) {
             // OK: high followed by low.  This is a valid
             // surrogate pair.
             code = ((code*31) + ch)*31+ch2;
             downto--;
             continue;
           } else {
             // Unpaired
             ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
           }
         }
       } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END)
         // Unpaired
         ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;

       code = (code*31) + ch;
     }

     int hashPos = code & postingsHashMask;

     // Locate RawPostingList in hash
     p = postingsHash[hashPos];

     if (p != null && !postingEquals(tokenText, tokenTextLen)) {
       // Conflict: keep searching different locations in
       // the hash table.
       final int inc = ((code>>8)+code)|1;
       do {
         code += inc;
         hashPos = code & postingsHashMask;
         p = postingsHash[hashPos];
       } while (p != null && !postingEquals(tokenText, tokenTextLen));
     }

     if (p == null) {

       // First time we are seeing this token since we last
       // flushed the hash.
       final int textLen1 = 1+tokenTextLen;
       if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) {
         if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) {
           // Just skip this term, to remain as robust as
           // possible during indexing.  A TokenFilter
           // can be inserted into the analyzer chain if
           // other behavior is wanted (pruning the term
           // to a prefix, throwing an exception, etc).

           if (docState.maxTermPrefix == null)
             docState.maxTermPrefix = new String(tokenText, 0, 30);

           consumer.skippingLongTerm(token);
           return;
         }
         charPool.nextBuffer();
       }

       // Refill?
       if (0 == perThread.freePostingsCount)
         perThread.morePostings();

       // Pull next free RawPostingList from free list
       p = perThread.freePostings[--perThread.freePostingsCount];
       assert p != null;

       final char[] text = charPool.buffer;
       final int textUpto = charPool.charUpto;
       p.textStart = textUpto + charPool.charOffset;
       charPool.charUpto += textLen1;
       System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen);
       text[textUpto+tokenTextLen] = 0xffff;

       assert postingsHash[hashPos] == null;
       postingsHash[hashPos] = p;
       numPostings++;

       if (numPostings == postingsHashHalfSize)
         rehashPostings(2*postingsHashSize);

       // Init stream slices
       if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
         intPool.nextBuffer();

       if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
         bytePool.nextBuffer();

       intUptos = intPool.buffer;
       intUptoStart = intPool.intUpto;
       intPool.intUpto += streamCount;

       p.intStart = intUptoStart + intPool.intOffset;

       for(int i=0;i<streamCount;i++) {
         final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
         intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
       }
       p.byteStart = intUptos[intUptoStart];

       consumer.newTerm(token, p);

     } else {
       intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
       intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
       consumer.addTerm(token, p);
     }

     if (doNextCall)
       nextPerField.add(token, p.textStart);
   }

   int[] intUptos;
   int intUptoStart;

   void writeByte(int stream, byte b) {
     int upto = intUptos[intUptoStart+stream];
     byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
     assert bytes != null;
     int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK;
     if (bytes[offset] != 0) {
       // End of slice; allocate a new one
       offset = bytePool.allocSlice(bytes, offset);
       bytes = bytePool.buffer;
       intUptos[intUptoStart+stream] = offset + bytePool.byteOffset;
     }
     bytes[offset] = b;
     (intUptos[intUptoStart+stream])++;
   }

   public void writeBytes(int stream, byte[] b, int offset, int len) {
     // TODO: optimize
     final int end = offset + len;
     for(int i=offset;i<end;i++)
       writeByte(stream, b[i]);
   }

   void writeVInt(int stream, int i) {
     assert stream < streamCount;
     while ((i & ~0x7F) != 0) {
       writeByte(stream, (byte)((i & 0x7f) | 0x80));
       i >>>= 7;
     }
     writeByte(stream, (byte) i);
   }

   void finish() throws IOException {
     consumer.finish();
     if (nextPerField != null)
       nextPerField.finish();
   }

   /** Called when postings hash is too small (> 50%
    *  occupied) or too large (< 20% occupied). */
   void rehashPostings(final int newSize) {

     final int newMask = newSize-1;

     RawPostingList[] newHash = new RawPostingList[newSize];
     for(int i=0;i<postingsHashSize;i++) {
       RawPostingList p0 = postingsHash[i];
       if (p0 != null) {
         int code;
         if (perThread.primary) {
           final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
           final char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
           int pos = start;
           while(text[pos] != 0xffff)
             pos++;
           code = 0;
           while (pos > start)
             code = (code*31) + text[--pos];
         } else
           code = p0.textStart;

         int hashPos = code & newMask;
         assert hashPos >= 0;
         if (newHash[hashPos] != null) {
           final int inc = ((code>>8)+code)|1;
           do {
             code += inc;
             hashPos = code & newMask;
           } while (newHash[hashPos] != null);
         }
         newHash[hashPos] = p0;
       }
     }

     postingsHashMask = newMask;
     postingsHash = newHash;
     postingsHashSize = newSize;
     postingsHashHalfSize = newSize >> 1;
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Arrays;

	import org.apache.lucene.document.Fieldable;
	import org.apache.lucene.analysis.Token;
	import org.apache.lucene.util.UnicodeUtil;

	final class TermsHashPerField extends InvertedDocConsumerPerField {

	final TermsHashConsumerPerField consumer;
	final TermsHashPerField nextPerField;
	final TermsHashPerThread perThread;
	final DocumentsWriter.DocState docState;
	final DocInverter.FieldInvertState fieldState;

	// Copied from our perThread
	final CharBlockPool charPool;
	final IntBlockPool intPool;
	final ByteBlockPool bytePool;

	final int streamCount;
	final int numPostingInt;

	final FieldInfo fieldInfo;

	boolean postingsCompacted;
	int numPostings;
	private int postingsHashSize = 4;
	private int postingsHashHalfSize = postingsHashSize/2;
	private int postingsHashMask = postingsHashSize-1;
	private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize];
	private RawPostingList p;

	public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
	this.perThread = perThread;
	intPool = perThread.intPool;
	charPool = perThread.charPool;
	bytePool = perThread.bytePool;
	docState = perThread.docState;
	fieldState = docInverterPerField.fieldState;
	this.consumer = perThread.consumer.addField(this, fieldInfo);
	streamCount = consumer.getStreamCount();
	numPostingInt = 2*streamCount;
	this.fieldInfo = fieldInfo;
	if (nextPerThread != null)
	nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
	else
	nextPerField = null;
	}

	void shrinkHash(int targetSize) {
	assert postingsCompacted \|\| numPostings == 0;

	// Cannot use ArrayUtil.shrink because we require power
	// of 2:
	int newSize = postingsHash.length;
	while(newSize >= 8 && newSize/4 > targetSize) {
	newSize /= 2;
	}

	if (newSize != postingsHash.length) {
	postingsHash = new RawPostingList[newSize];
	postingsHashSize = newSize;
	postingsHashHalfSize = newSize/2;
	postingsHashMask = newSize-1;
	}
	}

	public void reset() {
	if (!postingsCompacted)
	compactPostings();
	assert numPostings <= postingsHash.length;
	if (numPostings > 0) {
	perThread.termsHash.recyclePostings(postingsHash, numPostings);
	Arrays.fill(postingsHash, 0, numPostings, null);
	numPostings = 0;
	}
	postingsCompacted = false;
	if (nextPerField != null)
	nextPerField.reset();
	}

	synchronized public void abort() {
	reset();
	if (nextPerField != null)
	nextPerField.abort();
	}

	public void initReader(ByteSliceReader reader, RawPostingList p, int stream) {
	assert stream < streamCount;
	final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
	final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
	reader.init(bytePool,
	p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
	ints[upto+stream]);
	}

	private synchronized void compactPostings() {
	int upto = 0;
	for(int i=0;i<postingsHashSize;i++) {
	if (postingsHash[i] != null) {
	if (upto < i) {
	postingsHash[upto] = postingsHash[i];
	postingsHash[i] = null;
	}
	upto++;
	}
	}

	assert upto == numPostings;
	postingsCompacted = true;
	}

	/** Collapse the hash table & sort in-place. */
	public RawPostingList[] sortPostings() {
	compactPostings();
	quickSort(postingsHash, 0, numPostings-1);
	return postingsHash;
	}

	void quickSort(RawPostingList[] postings, int lo, int hi) {
	if (lo >= hi)
	return;
	else if (hi == 1+lo) {
	if (comparePostings(postings[lo], postings[hi]) > 0) {
	final RawPostingList tmp = postings[lo];
	postings[lo] = postings[hi];
	postings[hi] = tmp;
	}
	return;
	}

	int mid = (lo + hi) >>> 1;

	if (comparePostings(postings[lo], postings[mid]) > 0) {
	RawPostingList tmp = postings[lo];
	postings[lo] = postings[mid];
	postings[mid] = tmp;
	}

	if (comparePostings(postings[mid], postings[hi]) > 0) {
	RawPostingList tmp = postings[mid];
	postings[mid] = postings[hi];
	postings[hi] = tmp;

	if (comparePostings(postings[lo], postings[mid]) > 0) {
	RawPostingList tmp2 = postings[lo];
	postings[lo] = postings[mid];
	postings[mid] = tmp2;
	}
	}

	int left = lo + 1;
	int right = hi - 1;

	if (left >= right)
	return;

	RawPostingList partition = postings[mid];

	for (; ;) {
	while (comparePostings(postings[right], partition) > 0)
	--right;

	while (left < right && comparePostings(postings[left], partition) <= 0)
	++left;

	if (left < right) {
	RawPostingList tmp = postings[left];
	postings[left] = postings[right];
	postings[right] = tmp;
	--right;
	} else {
	break;
	}
	}

	quickSort(postings, lo, left);
	quickSort(postings, left + 1, hi);
	}

	/** Compares term text for two Posting instance and
	* returns -1 if p1 < p2; 1 if p1 > p2; else 0. */
	int comparePostings(RawPostingList p1, RawPostingList p2) {

	if (p1 == p2)
	return 0;

	final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
	int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
	final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
	int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

	assert text1 != text2 \|\| pos1 != pos2;

	while(true) {
	final char c1 = text1[pos1++];
	final char c2 = text2[pos2++];
	if (c1 != c2) {
	if (0xffff == c2)
	return 1;
	else if (0xffff == c1)
	return -1;
	else
	return c1-c2;
	} else
	// This method should never compare equal postings
	// unless p1==p2
	assert c1 != 0xffff;
	}
	}

	/** Test whether the text for current RawPostingList p equals
	* current tokenText. */
	private boolean postingEquals(final char[] tokenText, final int tokenTextLen) {

	final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
	assert text != null;
	int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK;

	int tokenPos = 0;
	for(;tokenPos<tokenTextLen;pos++,tokenPos++)
	if (tokenText[tokenPos] != text[pos])
	return false;
	return 0xffff == text[pos];
	}

	private boolean doCall;
	private boolean doNextCall;

	boolean start(Fieldable[] fields, int count) throws IOException {
	doCall = consumer.start(fields, count);
	if (nextPerField != null)
	doNextCall = nextPerField.start(fields, count);
	return doCall \|\| doNextCall;
	}

	// Secondary entry point (for 2nd & subsequent TermsHash),
	// because token text has already been "interned" into
	// textStart, so we hash by textStart
	public void add(Token token, int textStart) throws IOException {

	int code = textStart;

	int hashPos = code & postingsHashMask;

	assert !postingsCompacted;

	// Locate RawPostingList in hash
	p = postingsHash[hashPos];

	if (p != null && p.textStart != textStart) {
	// Conflict: keep searching different locations in
	// the hash table.
	final int inc = ((code>>8)+code)\|1;
	do {
	code += inc;
	hashPos = code & postingsHashMask;
	p = postingsHash[hashPos];
	} while (p != null && p.textStart != textStart);
	}

	if (p == null) {

	// First time we are seeing this token since we last
	// flushed the hash.

	// Refill?
	if (0 == perThread.freePostingsCount)
	perThread.morePostings();

	// Pull next free RawPostingList from free list
	p = perThread.freePostings[--perThread.freePostingsCount];
	assert p != null;

	p.textStart = textStart;

	assert postingsHash[hashPos] == null;
	postingsHash[hashPos] = p;
	numPostings++;

	if (numPostings == postingsHashHalfSize)
	rehashPostings(2*postingsHashSize);

	// Init stream slices
	if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
	intPool.nextBuffer();

	if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
	bytePool.nextBuffer();

	intUptos = intPool.buffer;
	intUptoStart = intPool.intUpto;
	intPool.intUpto += streamCount;

	p.intStart = intUptoStart + intPool.intOffset;

	for(int i=0;i<streamCount;i++) {
	final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
	intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
	}
	p.byteStart = intUptos[intUptoStart];

	consumer.newTerm(token, p);

	} else {
	intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
	intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
	consumer.addTerm(token, p);
	}
	}

	// Primary entry point (for first TermsHash)
	void add(Token token) throws IOException {

	assert !postingsCompacted;

	// We are first in the chain so we must "intern" the
	// term text into textStart address

	// Get the text of this term.
	final char[] tokenText = token.termBuffer();
	final int tokenTextLen = token.termLength();

	// Compute hashcode & replace any invalid UTF16 sequences
	int downto = tokenTextLen;
	int code = 0;
	while (downto > 0) {
	char ch = tokenText[--downto];

	if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) {
	if (0 == downto) {
	// Unpaired
	ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
	} else {
	final char ch2 = tokenText[downto-1];
	if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) {
	// OK: high followed by low. This is a valid
	// surrogate pair.
	code = ((code31) + ch)31+ch2;
	downto--;
	continue;
	} else {
	// Unpaired
	ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;
	}
	}
	} else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END)
	// Unpaired
	ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR;

	code = (code*31) + ch;
	}

	int hashPos = code & postingsHashMask;

	// Locate RawPostingList in hash
	p = postingsHash[hashPos];

	if (p != null && !postingEquals(tokenText, tokenTextLen)) {
	// Conflict: keep searching different locations in
	// the hash table.
	final int inc = ((code>>8)+code)\|1;
	do {
	code += inc;
	hashPos = code & postingsHashMask;
	p = postingsHash[hashPos];
	} while (p != null && !postingEquals(tokenText, tokenTextLen));
	}

	if (p == null) {

	// First time we are seeing this token since we last
	// flushed the hash.
	final int textLen1 = 1+tokenTextLen;
	if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) {
	if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) {
	// Just skip this term, to remain as robust as
	// possible during indexing. A TokenFilter
	// can be inserted into the analyzer chain if
	// other behavior is wanted (pruning the term
	// to a prefix, throwing an exception, etc).

	if (docState.maxTermPrefix == null)
	docState.maxTermPrefix = new String(tokenText, 0, 30);

	consumer.skippingLongTerm(token);
	return;
	}
	charPool.nextBuffer();
	}

	// Refill?
	if (0 == perThread.freePostingsCount)
	perThread.morePostings();

	// Pull next free RawPostingList from free list
	p = perThread.freePostings[--perThread.freePostingsCount];
	assert p != null;

	final char[] text = charPool.buffer;
	final int textUpto = charPool.charUpto;
	p.textStart = textUpto + charPool.charOffset;
	charPool.charUpto += textLen1;
	System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen);
	text[textUpto+tokenTextLen] = 0xffff;

	assert postingsHash[hashPos] == null;
	postingsHash[hashPos] = p;
	numPostings++;

	if (numPostings == postingsHashHalfSize)
	rehashPostings(2*postingsHashSize);

	// Init stream slices
	if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
	intPool.nextBuffer();

	if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
	bytePool.nextBuffer();

	intUptos = intPool.buffer;
	intUptoStart = intPool.intUpto;
	intPool.intUpto += streamCount;

	p.intStart = intUptoStart + intPool.intOffset;

	for(int i=0;i<streamCount;i++) {
	final int upto = bytePool.newSlice(ByteBlockPool.FIRST_LEVEL_SIZE);
	intUptos[intUptoStart+i] = upto + bytePool.byteOffset;
	}
	p.byteStart = intUptos[intUptoStart];

	consumer.newTerm(token, p);

	} else {
	intUptos = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
	intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK;
	consumer.addTerm(token, p);
	}

	if (doNextCall)
	nextPerField.add(token, p.textStart);
	}

	int[] intUptos;
	int intUptoStart;

	void writeByte(int stream, byte b) {
	int upto = intUptos[intUptoStart+stream];
	byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT];
	assert bytes != null;
	int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK;
	if (bytes[offset] != 0) {
	// End of slice; allocate a new one
	offset = bytePool.allocSlice(bytes, offset);
	bytes = bytePool.buffer;
	intUptos[intUptoStart+stream] = offset + bytePool.byteOffset;
	}
	bytes[offset] = b;
	(intUptos[intUptoStart+stream])++;
	}

	public void writeBytes(int stream, byte[] b, int offset, int len) {
	// TODO: optimize
	final int end = offset + len;
	for(int i=offset;i<end;i++)
	writeByte(stream, b[i]);
	}

	void writeVInt(int stream, int i) {
	assert stream < streamCount;
	while ((i & ~0x7F) != 0) {
	writeByte(stream, (byte)((i & 0x7f) \| 0x80));
	i >>>= 7;
	}
	writeByte(stream, (byte) i);
	}

	void finish() throws IOException {
	consumer.finish();
	if (nextPerField != null)
	nextPerField.finish();
	}

	/** Called when postings hash is too small (> 50%
	* occupied) or too large (< 20% occupied). */
	void rehashPostings(final int newSize) {

	final int newMask = newSize-1;

	RawPostingList[] newHash = new RawPostingList[newSize];
	for(int i=0;i<postingsHashSize;i++) {
	RawPostingList p0 = postingsHash[i];
	if (p0 != null) {
	int code;
	if (perThread.primary) {
	final int start = p0.textStart & DocumentsWriter.CHAR_BLOCK_MASK;
	final char[] text = charPool.buffers[p0.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT];
	int pos = start;
	while(text[pos] != 0xffff)
	pos++;
	code = 0;
	while (pos > start)
	code = (code*31) + text[--pos];
	} else
	code = p0.textStart;

	int hashPos = code & newMask;
	assert hashPos >= 0;
	if (newHash[hashPos] != null) {
	final int inc = ((code>>8)+code)\|1;
	do {
	code += inc;
	hashPos = code & newMask;
	} while (newHash[hashPos] != null);
	}
	newHash[hashPos] = p0;
	}
	}

	postingsHashMask = newMask;
	postingsHash = newHash;
	postingsHashSize = newSize;
	postingsHashHalfSize = newSize >> 1;
	}
	}