lucene/core/src/java/org/apache/lucene/index/TermVectorsConsumer.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Map;

 import org.apache.lucene.codecs.TermVectorsWriter;
 import org.apache.lucene.store.FlushInfo;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;

 final class TermVectorsConsumer extends TermsHash {

   TermVectorsWriter writer;

   /** Scratch term used by TermVectorsConsumerPerField.finishDocument. */
   final BytesRef flushTerm = new BytesRef();

   final DocumentsWriterPerThread docWriter;

   /** Used by TermVectorsConsumerPerField when serializing
    *  the term vectors. */
   final ByteSliceReader vectorSliceReaderPos = new ByteSliceReader();
   final ByteSliceReader vectorSliceReaderOff = new ByteSliceReader();

   boolean hasVectors;
   int numVectorFields;
   int lastDocID;
   private TermVectorsConsumerPerField[] perFields = new TermVectorsConsumerPerField[1];

   public TermVectorsConsumer(DocumentsWriterPerThread docWriter) {
     super(docWriter, false, null);
     this.docWriter = docWriter;
   }

   @Override
   void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
     if (writer != null) {
       int numDocs = state.segmentInfo.getDocCount();
       assert numDocs > 0;
       // At least one doc in this run had term vectors enabled
       try {
         fill(numDocs);
         assert state.segmentInfo != null;
         writer.finish(state.fieldInfos, numDocs);
       } finally {
         IOUtils.close(writer);
         writer = null;
         lastDocID = 0;
         hasVectors = false;
       }
     }
   }

   /** Fills in no-term-vectors for all docs we haven't seen
    *  since the last doc that had term vectors. */
   void fill(int docID) throws IOException {
     while(lastDocID < docID) {
       writer.startDocument(0);
       writer.finishDocument();
       lastDocID++;
     }
   }

   private void initTermVectorsWriter() throws IOException {
     if (writer == null) {
       IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
       writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
       lastDocID = 0;
     }
   }

   @Override
   void finishDocument() throws IOException {

     assert docWriter.testPoint("TermVectorsTermsWriter.finishDocument start");

     if (!hasVectors) {
       return;
     }

     // Fields in term vectors are UTF16 sorted:
     ArrayUtil.introSort(perFields, 0, numVectorFields);

     initTermVectorsWriter();

     fill(docState.docID);

     // Append term vectors to the real outputs:
     writer.startDocument(numVectorFields);
     for (int i = 0; i < numVectorFields; i++) {
       perFields[i].finishDocument();
     }
     writer.finishDocument();

     assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;

     lastDocID++;

     super.reset();
     resetFields();
     assert docWriter.testPoint("TermVectorsTermsWriter.finishDocument end");
   }

   @Override
   public void abort() {
     hasVectors = false;
     try {
       super.abort();
     } finally {
       if (writer != null) {
         writer.abort();
         writer = null;
       }

       lastDocID = 0;
       reset();
     }
   }

   void resetFields() {
     Arrays.fill(perFields, null); // don't hang onto stuff from previous doc
     numVectorFields = 0;
   }

   @Override
   public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
     return new TermVectorsConsumerPerField(invertState, this, fieldInfo);
   }

   void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
     if (numVectorFields == perFields.length) {
       int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
       TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
       System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
       perFields = newArray;
     }

     perFields[numVectorFields++] = fieldToFlush;
   }

   @Override
   void startDocument() {
     resetFields();
     numVectorFields = 0;
   }
 }
	package org.apache.lucene.index;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.Map;

	import org.apache.lucene.codecs.TermVectorsWriter;
	import org.apache.lucene.store.FlushInfo;
	import org.apache.lucene.store.IOContext;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IOUtils;
	import org.apache.lucene.util.RamUsageEstimator;

	final class TermVectorsConsumer extends TermsHash {

	TermVectorsWriter writer;

	/** Scratch term used by TermVectorsConsumerPerField.finishDocument. */
	final BytesRef flushTerm = new BytesRef();

	final DocumentsWriterPerThread docWriter;

	/** Used by TermVectorsConsumerPerField when serializing
	* the term vectors. */
	final ByteSliceReader vectorSliceReaderPos = new ByteSliceReader();
	final ByteSliceReader vectorSliceReaderOff = new ByteSliceReader();

	boolean hasVectors;
	int numVectorFields;
	int lastDocID;
	private TermVectorsConsumerPerField[] perFields = new TermVectorsConsumerPerField[1];

	public TermVectorsConsumer(DocumentsWriterPerThread docWriter) {
	super(docWriter, false, null);
	this.docWriter = docWriter;
	}

	@Override
	void flush(Map<String, TermsHashPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
	if (writer != null) {
	int numDocs = state.segmentInfo.getDocCount();
	assert numDocs > 0;
	// At least one doc in this run had term vectors enabled
	try {
	fill(numDocs);
	assert state.segmentInfo != null;
	writer.finish(state.fieldInfos, numDocs);
	} finally {
	IOUtils.close(writer);
	writer = null;
	lastDocID = 0;
	hasVectors = false;
	}
	}
	}

	/** Fills in no-term-vectors for all docs we haven't seen
	* since the last doc that had term vectors. */
	void fill(int docID) throws IOException {
	while(lastDocID < docID) {
	writer.startDocument(0);
	writer.finishDocument();
	lastDocID++;
	}
	}

	private void initTermVectorsWriter() throws IOException {
	if (writer == null) {
	IOContext context = new IOContext(new FlushInfo(docWriter.getNumDocsInRAM(), docWriter.bytesUsed()));
	writer = docWriter.codec.termVectorsFormat().vectorsWriter(docWriter.directory, docWriter.getSegmentInfo(), context);
	lastDocID = 0;
	}
	}

	@Override
	void finishDocument() throws IOException {

	assert docWriter.testPoint("TermVectorsTermsWriter.finishDocument start");

	if (!hasVectors) {
	return;
	}

	// Fields in term vectors are UTF16 sorted:
	ArrayUtil.introSort(perFields, 0, numVectorFields);

	initTermVectorsWriter();

	fill(docState.docID);

	// Append term vectors to the real outputs:
	writer.startDocument(numVectorFields);
	for (int i = 0; i < numVectorFields; i++) {
	perFields[i].finishDocument();
	}
	writer.finishDocument();

	assert lastDocID == docState.docID: "lastDocID=" + lastDocID + " docState.docID=" + docState.docID;

	lastDocID++;

	super.reset();
	resetFields();
	assert docWriter.testPoint("TermVectorsTermsWriter.finishDocument end");
	}

	@Override
	public void abort() {
	hasVectors = false;
	try {
	super.abort();
	} finally {
	if (writer != null) {
	writer.abort();
	writer = null;
	}

	lastDocID = 0;
	reset();
	}
	}

	void resetFields() {
	Arrays.fill(perFields, null); // don't hang onto stuff from previous doc
	numVectorFields = 0;
	}

	@Override
	public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) {
	return new TermVectorsConsumerPerField(invertState, this, fieldInfo);
	}

	void addFieldToFlush(TermVectorsConsumerPerField fieldToFlush) {
	if (numVectorFields == perFields.length) {
	int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
	TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
	System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
	perFields = newArray;
	}

	perFields[numVectorFields++] = fieldToFlush;
	}

	@Override
	void startDocument() {
	resetFields();
	numVectorFields = 0;
	}
	}