lucene/backwards/src/test/org/apache/lucene/index/TestPayloads.java - manifoldcf-integration-solr-3.x - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;


 public class TestPayloads extends LuceneTestCase {

     // Simple tests to test the Payload class
     public void testPayload() throws Exception {
         byte[] testData = "This is a test!".getBytes();
         Payload payload = new Payload(testData);
         assertEquals("Wrong payload length.", testData.length, payload.length());

         // test copyTo()
         byte[] target = new byte[testData.length - 1];
         try {
             payload.copyTo(target, 0);
             fail("Expected exception not thrown");
         } catch (Exception expected) {
             // expected exception
         }

         target = new byte[testData.length + 3];
         payload.copyTo(target, 3);

         for (int i = 0; i < testData.length; i++) {
             assertEquals(testData[i], target[i + 3]);
         }


         // test toByteArray()
         target = payload.toByteArray();
         assertByteArrayEquals(testData, target);

         // test byteAt()
         for (int i = 0; i < testData.length; i++) {
             assertEquals(payload.byteAt(i), testData[i]);
         }

         try {
             payload.byteAt(testData.length + 1);
             fail("Expected exception not thrown");
         } catch (Exception expected) {
             // expected exception
         }

         Payload clone = (Payload) payload.clone();
         assertEquals(payload.length(), clone.length());
         for (int i = 0; i < payload.length(); i++) {
           assertEquals(payload.byteAt(i), clone.byteAt(i));
         }

     }

     // Tests whether the DocumentWriter and SegmentMerger correctly enable the
     // payload bit in the FieldInfo
     public void testPayloadFieldBit() throws Exception {
         Directory ram = newDirectory();
         PayloadAnalyzer analyzer = new PayloadAnalyzer();
         IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
         Document d = new Document();
         // this field won't have any payloads
         d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
         // this field will have payloads in all docs, however not for all term positions,
         // so this field is used to check if the DocumentWriter correctly enables the payloads bit
         // even if only some term positions have payloads
         d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
         d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
         // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
         // enabled in only some documents
         d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
         // only add payload data for field f2
         analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1);
         writer.addDocument(d);
         // flush
         writer.close();

         SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
         FieldInfos fi = reader.fieldInfos();
         assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads);
         assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads);
         assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").storePayloads);
         reader.close();

         // now we add another document which has payloads for field f3 and verify if the SegmentMerger
         // enabled payloads for that field
         writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
             analyzer).setOpenMode(OpenMode.CREATE));
         d = new Document();
         d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
         d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
         d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
         d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
         // add payload data for field f2 and f3
         analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
         analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3);
         writer.addDocument(d);
         // force merge
         writer.optimize();
         // flush
         writer.close();

         reader = SegmentReader.getOnlySegmentReader(ram);
         fi = reader.fieldInfos();
         assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads);
         assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads);
         assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").storePayloads);
         reader.close();
         ram.close();
     }

     // Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory
     public void testPayloadsEncoding() throws Exception {
         // first perform the test using a RAMDirectory
         Directory dir = newDirectory();
         performTest(dir);
         dir.close();
         // now use a FSDirectory and repeat same test
         File dirName = _TestUtil.getTempDir("test_payloads");
         dir = newFSDirectory(dirName);
         performTest(dir);
        _TestUtil.rmDir(dirName);
         dir.close();
     }

     // builds an index with payloads in the given Directory and performs
     // different tests to verify the payload encoding
     private void performTest(Directory dir) throws Exception {
         PayloadAnalyzer analyzer = new PayloadAnalyzer();
         IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
             TEST_VERSION_CURRENT, analyzer)
             .setOpenMode(OpenMode.CREATE)
             .setMergePolicy(newLogMergePolicy()));

         // should be in sync with value in TermInfosWriter
         final int skipInterval = 16;

         final int numTerms = 5;
         final String fieldName = "f1";

         int numDocs = skipInterval + 1;
         // create content for the test documents with just a few terms
         Term[] terms = generateTerms(fieldName, numTerms);
         StringBuilder sb = new StringBuilder();
         for (int i = 0; i < terms.length; i++) {
             sb.append(terms[i].text);
             sb.append(" ");
         }
         String content = sb.toString();


         int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
         byte[] payloadData = generateRandomData(payloadDataLength);

         Document d = new Document();
         d.add(newField(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
         // add the same document multiple times to have the same payload lengths for all
         // occurrences within two consecutive skip intervals
         int offset = 0;
         for (int i = 0; i < 2 * numDocs; i++) {
             analyzer.setPayloadData(fieldName, payloadData, offset, 1);
             offset += numTerms;
             writer.addDocument(d);
         }

         // make sure we create more than one segment to test merging
         writer.commit();

         // now we make sure to have different payload lengths next at the next skip point
         for (int i = 0; i < numDocs; i++) {
             analyzer.setPayloadData(fieldName, payloadData, offset, i);
             offset += i * numTerms;
             writer.addDocument(d);
         }

         writer.optimize();
         // flush
         writer.close();


         /*
          * Verify the index
          * first we test if all payloads are stored correctly
          */
         IndexReader reader = IndexReader.open(dir, true);

         byte[] verifyPayloadData = new byte[payloadDataLength];
         offset = 0;
         TermPositions[] tps = new TermPositions[numTerms];
         for (int i = 0; i < numTerms; i++) {
             tps[i] = reader.termPositions(terms[i]);
         }

         while (tps[0].next()) {
             for (int i = 1; i < numTerms; i++) {
                 tps[i].next();
             }
             int freq = tps[0].freq();

             for (int i = 0; i < freq; i++) {
                 for (int j = 0; j < numTerms; j++) {
                     tps[j].nextPosition();
                     if (tps[j].isPayloadAvailable()) {
                       tps[j].getPayload(verifyPayloadData, offset);
                       offset += tps[j].getPayloadLength();
                     }
                 }
             }
         }

         for (int i = 0; i < numTerms; i++) {
             tps[i].close();
         }

         assertByteArrayEquals(payloadData, verifyPayloadData);

         /*
          *  test lazy skipping
          */
         TermPositions tp = reader.termPositions(terms[0]);
         tp.next();
         tp.nextPosition();
         // now we don't read this payload
         tp.nextPosition();
         assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
         byte[] payload = tp.getPayload(null, 0);
         assertEquals(payload[0], payloadData[numTerms]);
         tp.nextPosition();

         // we don't read this payload and skip to a different document
         tp.skipTo(5);
         tp.nextPosition();
         assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
         payload = tp.getPayload(null, 0);
         assertEquals(payload[0], payloadData[5 * numTerms]);


         /*
          * Test different lengths at skip points
          */
         tp.seek(terms[1]);
         tp.next();
         tp.nextPosition();
         assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
         tp.skipTo(skipInterval - 1);
         tp.nextPosition();
         assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
         tp.skipTo(2 * skipInterval - 1);
         tp.nextPosition();
         assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
         tp.skipTo(3 * skipInterval - 1);
         tp.nextPosition();
         assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayloadLength());

         /*
          * Test multiple call of getPayload()
          */
         tp.getPayload(null, 0);
         try {
             // it is forbidden to call getPayload() more than once
             // without calling nextPosition()
             tp.getPayload(null, 0);
             fail("Expected exception not thrown");
         } catch (Exception expected) {
             // expected exception
         }

         reader.close();

         // test long payload
         analyzer = new PayloadAnalyzer();
         writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
             analyzer).setOpenMode(OpenMode.CREATE));
         String singleTerm = "lucene";

         d = new Document();
         d.add(newField(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
         // add a payload whose length is greater than the buffer size of BufferedIndexOutput
         payloadData = generateRandomData(2000);
         analyzer.setPayloadData(fieldName, payloadData, 100, 1500);
         writer.addDocument(d);


         writer.optimize();
         // flush
         writer.close();

         reader = IndexReader.open(dir, true);
         tp = reader.termPositions(new Term(fieldName, singleTerm));
         tp.next();
         tp.nextPosition();

         verifyPayloadData = new byte[tp.getPayloadLength()];
         tp.getPayload(verifyPayloadData, 0);
         byte[] portion = new byte[1500];
         System.arraycopy(payloadData, 100, portion, 0, 1500);

         assertByteArrayEquals(portion, verifyPayloadData);
         reader.close();

     }

     private void generateRandomData(byte[] data) {
       // this test needs the random data to be valid unicode
       String s = _TestUtil.randomFixedByteLengthUnicodeString(random, data.length);
       byte b[];
       try {
         b = s.getBytes("UTF-8");
       } catch (UnsupportedEncodingException e) {
         throw new RuntimeException(e);
       }
       assert b.length == data.length;
       System.arraycopy(b, 0, data, 0, b.length);
     }

     private byte[] generateRandomData(int n) {
         byte[] data = new byte[n];
         generateRandomData(data);
         return data;
     }

     private Term[] generateTerms(String fieldName, int n) {
         int maxDigits = (int) (Math.log(n) / Math.log(10));
         Term[] terms = new Term[n];
         StringBuilder sb = new StringBuilder();
         for (int i = 0; i < n; i++) {
             sb.setLength(0);
             sb.append("t");
             int zeros = maxDigits - (int) (Math.log(i) / Math.log(10));
             for (int j = 0; j < zeros; j++) {
                 sb.append("0");
             }
             sb.append(i);
             terms[i] = new Term(fieldName, sb.toString());
         }
         return terms;
     }


     void assertByteArrayEquals(byte[] b1, byte[] b2) {
         if (b1.length != b2.length) {
           fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length);
         }

         for (int i = 0; i < b1.length; i++) {
           if (b1[i] != b2[i]) {
             fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]);
           }
         }
       }


     /**
      * This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
      */
     private static class PayloadAnalyzer extends Analyzer {
         Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();

         void setPayloadData(String field, byte[] data, int offset, int length) {
             fieldToData.put(field, new PayloadData(0, data, offset, length));
         }

         void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) {
             fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length));
         }

         @Override
         public TokenStream tokenStream(String fieldName, Reader reader) {
             PayloadData payload =  fieldToData.get(fieldName);
             TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
             if (payload != null) {
                 if (payload.numFieldInstancesToSkip == 0) {
                     ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
                 } else {
                     payload.numFieldInstancesToSkip--;
                 }
             }
             return ts;
         }

         private static class PayloadData {
             byte[] data;
             int offset;
             int length;
             int numFieldInstancesToSkip;

             PayloadData(int skip, byte[] data, int offset, int length) {
                 numFieldInstancesToSkip = skip;
                 this.data = data;
                 this.offset = offset;
                 this.length = length;
             }
         }
     }


     /**
      * This Filter adds payloads to the tokens.
      */
     private static class PayloadFilter extends TokenFilter {
         private byte[] data;
         private int length;
         private int offset;
         private int startOffset;
         PayloadAttribute payloadAtt;

         public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
             super(in);
             this.data = data;
             this.length = length;
             this.offset = offset;
             this.startOffset = offset;
             payloadAtt = addAttribute(PayloadAttribute.class);
         }

         @Override
         public boolean incrementToken() throws IOException {
             boolean hasNext = input.incrementToken();
             if (hasNext) {
                 if (offset + length <= data.length) {
                     Payload p = new Payload();
                     payloadAtt.setPayload(p);
                     p.setData(data, offset, length);
                     offset += length;
                 } else {
                     payloadAtt.setPayload(null);
                 }
             }

             return hasNext;
         }

       @Override
       public void reset() throws IOException {
         super.reset();
         this.offset = startOffset;
       }
     }

     public void testThreadSafety() throws Exception {
         final int numThreads = 5;
         final int numDocs = atLeast(50);
         final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);

         Directory dir = newDirectory();
         final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
             TEST_VERSION_CURRENT, new MockAnalyzer(random)));
         final String field = "test";

         Thread[] ingesters = new Thread[numThreads];
         for (int i = 0; i < numThreads; i++) {
             ingesters[i] = new Thread() {
                 @Override
                 public void run() {
                     try {
                         for (int j = 0; j < numDocs; j++) {
                             Document d = new Document();
                             d.add(new Field(field, new PoolingPayloadTokenStream(pool)));
                             writer.addDocument(d);
                         }
                     } catch (Exception e) {
                         e.printStackTrace();
                         fail(e.toString());
                     }
                 }
             };
             ingesters[i].start();
         }

         for (int i = 0; i < numThreads; i++) {
           ingesters[i].join();
         }
         writer.close();
         IndexReader reader = IndexReader.open(dir, true);
         TermEnum terms = reader.terms();
         while (terms.next()) {
             TermPositions tp = reader.termPositions(terms.term());
             while(tp.next()) {
                 int freq = tp.freq();
                 for (int i = 0; i < freq; i++) {
                     tp.nextPosition();
                     byte payload[] = new byte[5];
                     tp.getPayload(payload, 0);
                     assertEquals(terms.term().text, new String(payload, 0, payload.length, "UTF-8"));
                 }
             }
             tp.close();
         }
         terms.close();
         reader.close();
         dir.close();
         assertEquals(pool.size(), numThreads);
     }

     private class PoolingPayloadTokenStream extends TokenStream {
         private byte[] payload;
         private boolean first;
         private ByteArrayPool pool;
         private String term;

         CharTermAttribute termAtt;
         PayloadAttribute payloadAtt;

         PoolingPayloadTokenStream(ByteArrayPool pool) {
             this.pool = pool;
             payload = pool.get();
             generateRandomData(payload);
             try {
               term = new String(payload, 0, payload.length, "UTF-8");
             } catch (UnsupportedEncodingException e) {
               throw new RuntimeException(e);
             }
             first = true;
             payloadAtt = addAttribute(PayloadAttribute.class);
             termAtt = addAttribute(CharTermAttribute.class);
         }

         @Override
         public boolean incrementToken() throws IOException {
             if (!first) return false;
             first = false;
             clearAttributes();
             termAtt.append(term);
             payloadAtt.setPayload(new Payload(payload));
             return true;
         }

         @Override
         public void close() throws IOException {
             pool.release(payload);
         }

     }

     private static class ByteArrayPool {
         private List<byte[]> pool;

         ByteArrayPool(int capacity, int size) {
             pool = new ArrayList<byte[]>();
             for (int i = 0; i < capacity; i++) {
                 pool.add(new byte[size]);
             }
         }

         synchronized byte[] get() {
             return pool.remove(0);
         }

         synchronized void release(byte[] b) {
             pool.add(b);
         }

         synchronized int size() {
             return pool.size();
         }
     }

   public void testAcrossFields() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random, dir,
                                                      new MockAnalyzer(random, MockTokenizer.WHITESPACE, true));
     Document doc = new Document();
     doc.add(new Field("hasMaybepayload", "here we go", Field.Store.YES, Field.Index.ANALYZED));
     writer.addDocument(doc);
     writer.close();

     writer = new RandomIndexWriter(random, dir,
                                    new MockAnalyzer(random, MockTokenizer.WHITESPACE, true));
     doc = new Document();
     doc.add(new Field("hasMaybepayload2", "here we go", Field.Store.YES, Field.Index.ANALYZED));
     writer.addDocument(doc);
     writer.addDocument(doc);
     writer.optimize();
     writer.close();

     dir.close();
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.File;
	import java.io.IOException;
	import java.io.Reader;
	import java.io.UnsupportedEncodingException;
	import java.util.ArrayList;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.analysis.MockTokenizer;
	import org.apache.lucene.analysis.TokenFilter;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.WhitespaceTokenizer;
	import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util._TestUtil;


	public class TestPayloads extends LuceneTestCase {

	// Simple tests to test the Payload class
	public void testPayload() throws Exception {
	byte[] testData = "This is a test!".getBytes();
	Payload payload = new Payload(testData);
	assertEquals("Wrong payload length.", testData.length, payload.length());

	// test copyTo()
	byte[] target = new byte[testData.length - 1];
	try {
	payload.copyTo(target, 0);
	fail("Expected exception not thrown");
	} catch (Exception expected) {
	// expected exception
	}

	target = new byte[testData.length + 3];
	payload.copyTo(target, 3);

	for (int i = 0; i < testData.length; i++) {
	assertEquals(testData[i], target[i + 3]);
	}


	// test toByteArray()
	target = payload.toByteArray();
	assertByteArrayEquals(testData, target);

	// test byteAt()
	for (int i = 0; i < testData.length; i++) {
	assertEquals(payload.byteAt(i), testData[i]);
	}

	try {
	payload.byteAt(testData.length + 1);
	fail("Expected exception not thrown");
	} catch (Exception expected) {
	// expected exception
	}

	Payload clone = (Payload) payload.clone();
	assertEquals(payload.length(), clone.length());
	for (int i = 0; i < payload.length(); i++) {
	assertEquals(payload.byteAt(i), clone.byteAt(i));
	}

	}

	// Tests whether the DocumentWriter and SegmentMerger correctly enable the
	// payload bit in the FieldInfo
	public void testPayloadFieldBit() throws Exception {
	Directory ram = newDirectory();
	PayloadAnalyzer analyzer = new PayloadAnalyzer();
	IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
	Document d = new Document();
	// this field won't have any payloads
	d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
	// this field will have payloads in all docs, however not for all term positions,
	// so this field is used to check if the DocumentWriter correctly enables the payloads bit
	// even if only some term positions have payloads
	d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
	d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
	// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
	// enabled in only some documents
	d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
	// only add payload data for field f2
	analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1);
	writer.addDocument(d);
	// flush
	writer.close();

	SegmentReader reader = SegmentReader.getOnlySegmentReader(ram);
	FieldInfos fi = reader.fieldInfos();
	assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads);
	assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads);
	assertFalse("Payload field bit should not be set.", fi.fieldInfo("f3").storePayloads);
	reader.close();

	// now we add another document which has payloads for field f3 and verify if the SegmentMerger
	// enabled payloads for that field
	writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT,
	analyzer).setOpenMode(OpenMode.CREATE));
	d = new Document();
	d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
	d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
	d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
	d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
	// add payload data for field f2 and f3
	analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
	analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3);
	writer.addDocument(d);
	// force merge
	writer.optimize();
	// flush
	writer.close();

	reader = SegmentReader.getOnlySegmentReader(ram);
	fi = reader.fieldInfos();
	assertFalse("Payload field bit should not be set.", fi.fieldInfo("f1").storePayloads);
	assertTrue("Payload field bit should be set.", fi.fieldInfo("f2").storePayloads);
	assertTrue("Payload field bit should be set.", fi.fieldInfo("f3").storePayloads);
	reader.close();
	ram.close();
	}

	// Tests if payloads are correctly stored and loaded using both RamDirectory and FSDirectory
	public void testPayloadsEncoding() throws Exception {
	// first perform the test using a RAMDirectory
	Directory dir = newDirectory();
	performTest(dir);
	dir.close();
	// now use a FSDirectory and repeat same test
	File dirName = _TestUtil.getTempDir("test_payloads");
	dir = newFSDirectory(dirName);
	performTest(dir);
	_TestUtil.rmDir(dirName);
	dir.close();
	}

	// builds an index with payloads in the given Directory and performs
	// different tests to verify the payload encoding
	private void performTest(Directory dir) throws Exception {
	PayloadAnalyzer analyzer = new PayloadAnalyzer();
	IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
	TEST_VERSION_CURRENT, analyzer)
	.setOpenMode(OpenMode.CREATE)
	.setMergePolicy(newLogMergePolicy()));

	// should be in sync with value in TermInfosWriter
	final int skipInterval = 16;

	final int numTerms = 5;
	final String fieldName = "f1";

	int numDocs = skipInterval + 1;
	// create content for the test documents with just a few terms
	Term[] terms = generateTerms(fieldName, numTerms);
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < terms.length; i++) {
	sb.append(terms[i].text);
	sb.append(" ");
	}
	String content = sb.toString();


	int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
	byte[] payloadData = generateRandomData(payloadDataLength);

	Document d = new Document();
	d.add(newField(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
	// add the same document multiple times to have the same payload lengths for all
	// occurrences within two consecutive skip intervals
	int offset = 0;
	for (int i = 0; i < 2 * numDocs; i++) {
	analyzer.setPayloadData(fieldName, payloadData, offset, 1);
	offset += numTerms;
	writer.addDocument(d);
	}

	// make sure we create more than one segment to test merging
	writer.commit();

	// now we make sure to have different payload lengths next at the next skip point
	for (int i = 0; i < numDocs; i++) {
	analyzer.setPayloadData(fieldName, payloadData, offset, i);
	offset += i * numTerms;
	writer.addDocument(d);
	}

	writer.optimize();
	// flush
	writer.close();


	/*
	* Verify the index
	* first we test if all payloads are stored correctly
	*/
	IndexReader reader = IndexReader.open(dir, true);

	byte[] verifyPayloadData = new byte[payloadDataLength];
	offset = 0;
	TermPositions[] tps = new TermPositions[numTerms];
	for (int i = 0; i < numTerms; i++) {
	tps[i] = reader.termPositions(terms[i]);
	}

	while (tps[0].next()) {
	for (int i = 1; i < numTerms; i++) {
	tps[i].next();
	}
	int freq = tps[0].freq();

	for (int i = 0; i < freq; i++) {
	for (int j = 0; j < numTerms; j++) {
	tps[j].nextPosition();
	if (tps[j].isPayloadAvailable()) {
	tps[j].getPayload(verifyPayloadData, offset);
	offset += tps[j].getPayloadLength();
	}
	}
	}
	}

	for (int i = 0; i < numTerms; i++) {
	tps[i].close();
	}

	assertByteArrayEquals(payloadData, verifyPayloadData);

	/*
	* test lazy skipping
	*/
	TermPositions tp = reader.termPositions(terms[0]);
	tp.next();
	tp.nextPosition();
	// now we don't read this payload
	tp.nextPosition();
	assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
	byte[] payload = tp.getPayload(null, 0);
	assertEquals(payload[0], payloadData[numTerms]);
	tp.nextPosition();

	// we don't read this payload and skip to a different document
	tp.skipTo(5);
	tp.nextPosition();
	assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
	payload = tp.getPayload(null, 0);
	assertEquals(payload[0], payloadData[5 * numTerms]);


	/*
	* Test different lengths at skip points
	*/
	tp.seek(terms[1]);
	tp.next();
	tp.nextPosition();
	assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
	tp.skipTo(skipInterval - 1);
	tp.nextPosition();
	assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
	tp.skipTo(2 * skipInterval - 1);
	tp.nextPosition();
	assertEquals("Wrong payload length.", 1, tp.getPayloadLength());
	tp.skipTo(3 * skipInterval - 1);
	tp.nextPosition();
	assertEquals("Wrong payload length.", 3 * skipInterval - 2 * numDocs - 1, tp.getPayloadLength());

	/*
	* Test multiple call of getPayload()
	*/
	tp.getPayload(null, 0);
	try {
	// it is forbidden to call getPayload() more than once
	// without calling nextPosition()
	tp.getPayload(null, 0);
	fail("Expected exception not thrown");
	} catch (Exception expected) {
	// expected exception
	}

	reader.close();

	// test long payload
	analyzer = new PayloadAnalyzer();
	writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT,
	analyzer).setOpenMode(OpenMode.CREATE));
	String singleTerm = "lucene";

	d = new Document();
	d.add(newField(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
	// add a payload whose length is greater than the buffer size of BufferedIndexOutput
	payloadData = generateRandomData(2000);
	analyzer.setPayloadData(fieldName, payloadData, 100, 1500);
	writer.addDocument(d);


	writer.optimize();
	// flush
	writer.close();

	reader = IndexReader.open(dir, true);
	tp = reader.termPositions(new Term(fieldName, singleTerm));
	tp.next();
	tp.nextPosition();

	verifyPayloadData = new byte[tp.getPayloadLength()];
	tp.getPayload(verifyPayloadData, 0);
	byte[] portion = new byte[1500];
	System.arraycopy(payloadData, 100, portion, 0, 1500);

	assertByteArrayEquals(portion, verifyPayloadData);
	reader.close();

	}

	private void generateRandomData(byte[] data) {
	// this test needs the random data to be valid unicode
	String s = _TestUtil.randomFixedByteLengthUnicodeString(random, data.length);
	byte b[];
	try {
	b = s.getBytes("UTF-8");
	} catch (UnsupportedEncodingException e) {
	throw new RuntimeException(e);
	}
	assert b.length == data.length;
	System.arraycopy(b, 0, data, 0, b.length);
	}

	private byte[] generateRandomData(int n) {
	byte[] data = new byte[n];
	generateRandomData(data);
	return data;
	}

	private Term[] generateTerms(String fieldName, int n) {
	int maxDigits = (int) (Math.log(n) / Math.log(10));
	Term[] terms = new Term[n];
	StringBuilder sb = new StringBuilder();
	for (int i = 0; i < n; i++) {
	sb.setLength(0);
	sb.append("t");
	int zeros = maxDigits - (int) (Math.log(i) / Math.log(10));
	for (int j = 0; j < zeros; j++) {
	sb.append("0");
	}
	sb.append(i);
	terms[i] = new Term(fieldName, sb.toString());
	}
	return terms;
	}


	void assertByteArrayEquals(byte[] b1, byte[] b2) {
	if (b1.length != b2.length) {
	fail("Byte arrays have different lengths: " + b1.length + ", " + b2.length);
	}

	for (int i = 0; i < b1.length; i++) {
	if (b1[i] != b2[i]) {
	fail("Byte arrays different at index " + i + ": " + b1[i] + ", " + b2[i]);
	}
	}
	}


	/**
	* This Analyzer uses an WhitespaceTokenizer and PayloadFilter.
	*/
	private static class PayloadAnalyzer extends Analyzer {
	Map<String,PayloadData> fieldToData = new HashMap<String,PayloadData>();

	void setPayloadData(String field, byte[] data, int offset, int length) {
	fieldToData.put(field, new PayloadData(0, data, offset, length));
	}

	void setPayloadData(String field, int numFieldInstancesToSkip, byte[] data, int offset, int length) {
	fieldToData.put(field, new PayloadData(numFieldInstancesToSkip, data, offset, length));
	}

	@Override
	public TokenStream tokenStream(String fieldName, Reader reader) {
	PayloadData payload = fieldToData.get(fieldName);
	TokenStream ts = new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader);
	if (payload != null) {
	if (payload.numFieldInstancesToSkip == 0) {
	ts = new PayloadFilter(ts, payload.data, payload.offset, payload.length);
	} else {
	payload.numFieldInstancesToSkip--;
	}
	}
	return ts;
	}

	private static class PayloadData {
	byte[] data;
	int offset;
	int length;
	int numFieldInstancesToSkip;

	PayloadData(int skip, byte[] data, int offset, int length) {
	numFieldInstancesToSkip = skip;
	this.data = data;
	this.offset = offset;
	this.length = length;
	}
	}
	}


	/**
	* This Filter adds payloads to the tokens.
	*/
	private static class PayloadFilter extends TokenFilter {
	private byte[] data;
	private int length;
	private int offset;
	private int startOffset;
	PayloadAttribute payloadAtt;

	public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
	super(in);
	this.data = data;
	this.length = length;
	this.offset = offset;
	this.startOffset = offset;
	payloadAtt = addAttribute(PayloadAttribute.class);
	}

	@Override
	public boolean incrementToken() throws IOException {
	boolean hasNext = input.incrementToken();
	if (hasNext) {
	if (offset + length <= data.length) {
	Payload p = new Payload();
	payloadAtt.setPayload(p);
	p.setData(data, offset, length);
	offset += length;
	} else {
	payloadAtt.setPayload(null);
	}
	}

	return hasNext;
	}

	@Override
	public void reset() throws IOException {
	super.reset();
	this.offset = startOffset;
	}
	}

	public void testThreadSafety() throws Exception {
	final int numThreads = 5;
	final int numDocs = atLeast(50);
	final ByteArrayPool pool = new ByteArrayPool(numThreads, 5);

	Directory dir = newDirectory();
	final IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
	TEST_VERSION_CURRENT, new MockAnalyzer(random)));
	final String field = "test";

	Thread[] ingesters = new Thread[numThreads];
	for (int i = 0; i < numThreads; i++) {
	ingesters[i] = new Thread() {
	@Override
	public void run() {
	try {
	for (int j = 0; j < numDocs; j++) {
	Document d = new Document();
	d.add(new Field(field, new PoolingPayloadTokenStream(pool)));
	writer.addDocument(d);
	}
	} catch (Exception e) {
	e.printStackTrace();
	fail(e.toString());
	}
	}
	};
	ingesters[i].start();
	}

	for (int i = 0; i < numThreads; i++) {
	ingesters[i].join();
	}
	writer.close();
	IndexReader reader = IndexReader.open(dir, true);
	TermEnum terms = reader.terms();
	while (terms.next()) {
	TermPositions tp = reader.termPositions(terms.term());
	while(tp.next()) {
	int freq = tp.freq();
	for (int i = 0; i < freq; i++) {
	tp.nextPosition();
	byte payload[] = new byte[5];
	tp.getPayload(payload, 0);
	assertEquals(terms.term().text, new String(payload, 0, payload.length, "UTF-8"));
	}
	}
	tp.close();
	}
	terms.close();
	reader.close();
	dir.close();
	assertEquals(pool.size(), numThreads);
	}

	private class PoolingPayloadTokenStream extends TokenStream {
	private byte[] payload;
	private boolean first;
	private ByteArrayPool pool;
	private String term;

	CharTermAttribute termAtt;
	PayloadAttribute payloadAtt;

	PoolingPayloadTokenStream(ByteArrayPool pool) {
	this.pool = pool;
	payload = pool.get();
	generateRandomData(payload);
	try {
	term = new String(payload, 0, payload.length, "UTF-8");
	} catch (UnsupportedEncodingException e) {
	throw new RuntimeException(e);
	}
	first = true;
	payloadAtt = addAttribute(PayloadAttribute.class);
	termAtt = addAttribute(CharTermAttribute.class);
	}

	@Override
	public boolean incrementToken() throws IOException {
	if (!first) return false;
	first = false;
	clearAttributes();
	termAtt.append(term);
	payloadAtt.setPayload(new Payload(payload));
	return true;
	}

	@Override
	public void close() throws IOException {
	pool.release(payload);
	}

	}

	private static class ByteArrayPool {
	private List<byte[]> pool;

	ByteArrayPool(int capacity, int size) {
	pool = new ArrayList<byte[]>();
	for (int i = 0; i < capacity; i++) {
	pool.add(new byte[size]);
	}
	}

	synchronized byte[] get() {
	return pool.remove(0);
	}

	synchronized void release(byte[] b) {
	pool.add(b);
	}

	synchronized int size() {
	return pool.size();
	}
	}

	public void testAcrossFields() throws Exception {
	Directory dir = newDirectory();
	RandomIndexWriter writer = new RandomIndexWriter(random, dir,
	new MockAnalyzer(random, MockTokenizer.WHITESPACE, true));
	Document doc = new Document();
	doc.add(new Field("hasMaybepayload", "here we go", Field.Store.YES, Field.Index.ANALYZED));
	writer.addDocument(doc);
	writer.close();

	writer = new RandomIndexWriter(random, dir,
	new MockAnalyzer(random, MockTokenizer.WHITESPACE, true));
	doc = new Document();
	doc.add(new Field("hasMaybepayload2", "here we go", Field.Store.YES, Field.Index.ANALYZED));
	writer.addDocument(doc);
	writer.addDocument(doc);
	writer.optimize();
	writer.close();

	dir.close();
	}
	}