lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockWriter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.codecs.uniformsplit.sharedterms;

 import java.io.IOException;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import org.apache.lucene.codecs.uniformsplit.BlockEncoder;
 import org.apache.lucene.codecs.uniformsplit.BlockLine;
 import org.apache.lucene.codecs.uniformsplit.BlockWriter;
 import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
 import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
 import org.apache.lucene.codecs.uniformsplit.TermBytes;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;

 /**
  * Writes terms blocks with the Shared Terms format.
  *
  * <p>As defined in {@link STUniformSplitTermsWriter}, all the fields terms are shared in the same
  * dictionary. Each block line contains a term and all the fields {@link
  * org.apache.lucene.index.TermState}s for this term.
  *
  * @lucene.experimental
  */
 public class STBlockWriter extends BlockWriter {

   protected final Set<FieldMetadata> fieldsInBlock;

   public STBlockWriter(
       IndexOutput blockOutput,
       int targetNumBlockLines,
       int deltaNumLines,
       BlockEncoder blockEncoder) {
     super(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
     fieldsInBlock = new HashSet<>();
   }

   /**
    * Adds a new {@link BlockLine} term for the current field.
    *
    * <p>This method determines whether the new term is part of the current block, or if it is part
    * of the next block. In the latter case, a new block is started (including one or more of the
    * lastly added lines), the current block is written to the block file, and the current block key
    * is added to the {@link org.apache.lucene.codecs.uniformsplit.IndexDictionary.Builder}.
    *
    * @param term The block line term. The {@link BytesRef} instance is used directly, the caller is
    *     responsible to make a deep copy if needed. This is required because we keep a list of block
    *     lines until we decide to write the current block, and each line must have a different term
    *     instance.
    * @param termStates Block line details for all fields in the line.
    * @param dictionaryBuilder to which the block keys are added.
    */
   public void addLine(
       BytesRef term,
       List<FieldMetadataTermState> termStates,
       IndexDictionary.Builder dictionaryBuilder)
       throws IOException {
     if (termStates.isEmpty()) {
       return;
     }
     int mdpLength = TermBytes.computeMdpLength(lastTerm, term);
     blockLines.add(new STBlockLine(new TermBytes(mdpLength, term), termStates));
     lastTerm = term;
     if (blockLines.size() >= targetNumBlockLines + deltaNumLines) {
       splitAndWriteBlock(dictionaryBuilder);
     }
   }

   @Override
   protected void finishLastBlock(IndexDictionary.Builder dictionaryBuilder) throws IOException {
     // Make this method accessible to package.
     super.finishLastBlock(dictionaryBuilder);
   }

   @Override
   protected BlockLine.Serializer createBlockLineSerializer() {
     return new STBlockLine.Serializer();
   }

   @Override
   protected void writeBlockLine(
       boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine)
       throws IOException {
     blockLineWriter.writeLine(
         blockLinesWriteBuffer,
         line,
         previousLine,
         Math.toIntExact(termStatesWriteBuffer.size()),
         isIncrementalEncodingSeed);
     ((STBlockLine.Serializer) blockLineWriter)
         .writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
     ((STBlockLine) line).collectFields(fieldsInBlock);
   }

   @Override
   protected void updateFieldMetadata(long blockStartFP) {
     assert !fieldsInBlock.isEmpty();
     for (FieldMetadata fieldMetadata : fieldsInBlock) {
       if (fieldMetadata.getFirstBlockStartFP() == -1) {
         fieldMetadata.setFirstBlockStartFP(blockStartFP);
       }
       fieldMetadata.setLastBlockStartFP(blockStartFP);
     }
     fieldsInBlock.clear();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.codecs.uniformsplit.sharedterms;

	import java.io.IOException;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Set;
	import org.apache.lucene.codecs.uniformsplit.BlockEncoder;
	import org.apache.lucene.codecs.uniformsplit.BlockLine;
	import org.apache.lucene.codecs.uniformsplit.BlockWriter;
	import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
	import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
	import org.apache.lucene.codecs.uniformsplit.TermBytes;
	import org.apache.lucene.store.IndexOutput;
	import org.apache.lucene.util.BytesRef;

	/**
	* Writes terms blocks with the Shared Terms format.
	*
	* <p>As defined in {@link STUniformSplitTermsWriter}, all the fields terms are shared in the same
	* dictionary. Each block line contains a term and all the fields {@link
	* org.apache.lucene.index.TermState}s for this term.
	*
	* @lucene.experimental
	*/
	public class STBlockWriter extends BlockWriter {

	protected final Set<FieldMetadata> fieldsInBlock;

	public STBlockWriter(
	IndexOutput blockOutput,
	int targetNumBlockLines,
	int deltaNumLines,
	BlockEncoder blockEncoder) {
	super(blockOutput, targetNumBlockLines, deltaNumLines, blockEncoder);
	fieldsInBlock = new HashSet<>();
	}

	/**
	* Adds a new {@link BlockLine} term for the current field.
	*
	* <p>This method determines whether the new term is part of the current block, or if it is part
	* of the next block. In the latter case, a new block is started (including one or more of the
	* lastly added lines), the current block is written to the block file, and the current block key
	* is added to the {@link org.apache.lucene.codecs.uniformsplit.IndexDictionary.Builder}.
	*
	* @param term The block line term. The {@link BytesRef} instance is used directly, the caller is
	* responsible to make a deep copy if needed. This is required because we keep a list of block
	* lines until we decide to write the current block, and each line must have a different term
	* instance.
	* @param termStates Block line details for all fields in the line.
	* @param dictionaryBuilder to which the block keys are added.
	*/
	public void addLine(
	BytesRef term,
	List<FieldMetadataTermState> termStates,
	IndexDictionary.Builder dictionaryBuilder)
	throws IOException {
	if (termStates.isEmpty()) {
	return;
	}
	int mdpLength = TermBytes.computeMdpLength(lastTerm, term);
	blockLines.add(new STBlockLine(new TermBytes(mdpLength, term), termStates));
	lastTerm = term;
	if (blockLines.size() >= targetNumBlockLines + deltaNumLines) {
	splitAndWriteBlock(dictionaryBuilder);
	}
	}

	@Override
	protected void finishLastBlock(IndexDictionary.Builder dictionaryBuilder) throws IOException {
	// Make this method accessible to package.
	super.finishLastBlock(dictionaryBuilder);
	}

	@Override
	protected BlockLine.Serializer createBlockLineSerializer() {
	return new STBlockLine.Serializer();
	}

	@Override
	protected void writeBlockLine(
	boolean isIncrementalEncodingSeed, BlockLine line, BlockLine previousLine)
	throws IOException {
	blockLineWriter.writeLine(
	blockLinesWriteBuffer,
	line,
	previousLine,
	Math.toIntExact(termStatesWriteBuffer.size()),
	isIncrementalEncodingSeed);
	((STBlockLine.Serializer) blockLineWriter)
	.writeLineTermStates(termStatesWriteBuffer, (STBlockLine) line, termStateSerializer);
	((STBlockLine) line).collectFields(fieldsInBlock);
	}

	@Override
	protected void updateFieldMetadata(long blockStartFP) {
	assert !fieldsInBlock.isEmpty();
	for (FieldMetadata fieldMetadata : fieldsInBlock) {
	if (fieldMetadata.getFirstBlockStartFP() == -1) {
	fieldMetadata.setFirstBlockStartFP(blockStartFP);
	}
	fieldMetadata.setLastBlockStartFP(blockStartFP);
	}
	fieldsInBlock.clear();
	}
	}