lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STBlockLine.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.codecs.uniformsplit.sharedterms;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.codecs.BlockTermState;
 import org.apache.lucene.codecs.uniformsplit.BlockHeader;
 import org.apache.lucene.codecs.uniformsplit.BlockLine;
 import org.apache.lucene.codecs.uniformsplit.DeltaBaseTermStateSerializer;
 import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
 import org.apache.lucene.codecs.uniformsplit.TermBytes;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;

 /**
  * Represents a term and its details stored in the {@link BlockTermState}. It is an extension of
  * {@link BlockLine} for the Shared Terms format. This means the line contains a term and all its
  * fields {@link org.apache.lucene.index.TermState}s.
  *
  * @lucene.experimental
  */
 public class STBlockLine extends BlockLine {

   /** List of the fields ond their TermStates for this block line. Only used for writing. */
   protected final List<FieldMetadataTermState> termStates;

   public STBlockLine(TermBytes termBytes, List<FieldMetadataTermState> termStates) {
     super(termBytes, null);
     assert !termStates.isEmpty();
     this.termStates = new ArrayList<>(termStates);
   }

   /**
    * Collects the {@link FieldMetadata} of all fields listed in this line.
    *
    * @param collector Receives the collected {@link FieldMetadata}.
    */
   public void collectFields(Collection<FieldMetadata> collector) {
     for (FieldMetadataTermState fieldTermState : termStates) {
       collector.add(fieldTermState.fieldMetadata);
     }
   }

   /**
    * Reads block lines encoded incrementally, with all fields corresponding to the term of the line.
    *
    * <p>This class extends {@link org.apache.lucene.codecs.uniformsplit.BlockLine.Serializer}, so it
    * keeps a state of the previous term read to decode the next term.
    */
   public static class Serializer extends BlockLine.Serializer {

     /**
      * Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given
      * output.
      */
     public void writeLineTermStates(
         DataOutput termStatesOutput, STBlockLine line, DeltaBaseTermStateSerializer encoder)
         throws IOException {

       FieldMetadataTermState fieldMetadataTermState;
       int size = line.termStates.size();
       assert size > 0 : "not valid block line with :" + size + " lines.";
       if (size == 1) {
         // When there is only 1 field, write its id as negative, followed by the field TermState.
         int fieldID = line.termStates.get(0).fieldMetadata.getFieldInfo().number;
         termStatesOutput.writeZInt(-fieldID);
         fieldMetadataTermState = line.termStates.get(0);
         encoder.writeTermState(
             termStatesOutput,
             fieldMetadataTermState.fieldMetadata.getFieldInfo(),
             fieldMetadataTermState.state);
         return;
       }

       termStatesOutput.writeZInt(size);
       // First iteration writes the fields ids.
       for (int i = 0; i < size; i++) {
         fieldMetadataTermState = line.termStates.get(i);
         termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata.getFieldInfo().number);
       }
       // Second iteration writes the corresponding field TermStates.
       for (int i = 0; i < size; i++) {
         fieldMetadataTermState = line.termStates.get(i);
         encoder.writeTermState(
             termStatesOutput,
             fieldMetadataTermState.fieldMetadata.getFieldInfo(),
             fieldMetadataTermState.state);
       }
     }

     /**
      * Reads a single {@link BlockTermState} for the provided field in the current block line of the
      * provided input.
      *
      * @param termStatesInput Data input to read the {@link BlockTermState} from.
      * @param blockHeader Current block header.
      * @param reuse A previous {@link BlockTermState} to reuse; or null to create a new one.
      * @return The {@link BlockTermState} corresponding to the provided field id; or null if the
      *     field does not occur in the line.
      */
     public BlockTermState readTermStateForField(
         int fieldId,
         DataInput termStatesInput,
         DeltaBaseTermStateSerializer termStateSerializer,
         BlockHeader blockHeader,
         FieldInfos fieldInfos,
         BlockTermState reuse)
         throws IOException {
       assert fieldId >= 0;
       int numFields = termStatesInput.readZInt();
       if (numFields <= 0) {
         int readFieldId = -numFields;
         if (fieldId == readFieldId) {
           return termStateSerializer.readTermState(
               blockHeader.getBaseDocsFP(),
               blockHeader.getBasePositionsFP(),
               blockHeader.getBasePayloadsFP(),
               termStatesInput,
               fieldInfos.fieldInfo(readFieldId),
               reuse);
         }
         return null;
       }

       // There are multiple fields for the term.
       // We have to read all the field ids (aka field numbers) sequentially.
       // Then if the required field is in the list, we have to read all the TermState
       // sequentially. This could be optimized with a jump-to-middle offset
       // for example, but we don't need that currently.

       boolean isFieldInList = false;
       int[] readFieldIds = new int[numFields];
       for (int i = 0; i < numFields; i++) {
         int readFieldId = termStatesInput.readVInt();
         if (!isFieldInList && readFieldId > fieldId) {
           // As the list of fieldIds is sorted we can return early if we find fieldId greater than
           // the seeked one.
           // But if we found the seeked one, we have to read all the list to get to the term state
           // part afterward (there is no jump offset).
           return null;
         }
         isFieldInList |= readFieldId == fieldId;
         readFieldIds[i] = readFieldId;
       }
       if (isFieldInList) {
         for (int readFieldId : readFieldIds) {
           BlockTermState termState =
               termStateSerializer.readTermState(
                   blockHeader.getBaseDocsFP(),
                   blockHeader.getBasePositionsFP(),
                   blockHeader.getBasePayloadsFP(),
                   termStatesInput,
                   fieldInfos.fieldInfo(readFieldId),
                   reuse);
           if (fieldId == readFieldId) {
             return termState;
           }
         }
       }
       return null;
     }

     /**
      * Reads all the {@link BlockTermState} of all the field in the current block line of the
      * provided input.
      *
      * @param fieldTermStatesMap Map filled with the term states for each field. It is cleared
      *     first.
      * @see #readTermStateForField
      */
     public void readFieldTermStatesMap(
         DataInput termStatesInput,
         DeltaBaseTermStateSerializer termStateSerializer,
         BlockHeader blockHeader,
         FieldInfos fieldInfos,
         Map<String, BlockTermState> fieldTermStatesMap)
         throws IOException {
       fieldTermStatesMap.clear();
       int numFields = termStatesInput.readZInt();
       if (numFields <= 0) {
         int fieldId = -numFields;
         fieldTermStatesMap.put(
             fieldInfos.fieldInfo(fieldId).name,
             termStateSerializer.readTermState(
                 blockHeader.getBaseDocsFP(),
                 blockHeader.getBasePositionsFP(),
                 blockHeader.getBasePayloadsFP(),
                 termStatesInput,
                 fieldInfos.fieldInfo(fieldId),
                 null));
         return;
       }
       for (int fieldId : readFieldIds(termStatesInput, numFields)) {
         fieldTermStatesMap.put(
             fieldInfos.fieldInfo(fieldId).name,
             termStateSerializer.readTermState(
                 blockHeader.getBaseDocsFP(),
                 blockHeader.getBasePositionsFP(),
                 blockHeader.getBasePayloadsFP(),
                 termStatesInput,
                 fieldInfos.fieldInfo(fieldId),
                 null));
       }
     }

     /** Reads all the field ids in the current block line of the provided input. */
     public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
       int[] fieldIds = new int[numFields];
       for (int i = 0; i < numFields; i++) {
         fieldIds[i] = termStatesInput.readVInt();
       }
       return fieldIds;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.codecs.uniformsplit.sharedterms;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.List;
	import java.util.Map;
	import org.apache.lucene.codecs.BlockTermState;
	import org.apache.lucene.codecs.uniformsplit.BlockHeader;
	import org.apache.lucene.codecs.uniformsplit.BlockLine;
	import org.apache.lucene.codecs.uniformsplit.DeltaBaseTermStateSerializer;
	import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
	import org.apache.lucene.codecs.uniformsplit.TermBytes;
	import org.apache.lucene.index.FieldInfos;
	import org.apache.lucene.store.DataInput;
	import org.apache.lucene.store.DataOutput;

	/**
	* Represents a term and its details stored in the {@link BlockTermState}. It is an extension of
	* {@link BlockLine} for the Shared Terms format. This means the line contains a term and all its
	* fields {@link org.apache.lucene.index.TermState}s.
	*
	* @lucene.experimental
	*/
	public class STBlockLine extends BlockLine {

	/** List of the fields ond their TermStates for this block line. Only used for writing. */
	protected final List<FieldMetadataTermState> termStates;

	public STBlockLine(TermBytes termBytes, List<FieldMetadataTermState> termStates) {
	super(termBytes, null);
	assert !termStates.isEmpty();
	this.termStates = new ArrayList<>(termStates);
	}

	/**
	* Collects the {@link FieldMetadata} of all fields listed in this line.
	*
	* @param collector Receives the collected {@link FieldMetadata}.
	*/
	public void collectFields(Collection<FieldMetadata> collector) {
	for (FieldMetadataTermState fieldTermState : termStates) {
	collector.add(fieldTermState.fieldMetadata);
	}
	}

	/**
	* Reads block lines encoded incrementally, with all fields corresponding to the term of the line.
	*
	* <p>This class extends {@link org.apache.lucene.codecs.uniformsplit.BlockLine.Serializer}, so it
	* keeps a state of the previous term read to decode the next term.
	*/
	public static class Serializer extends BlockLine.Serializer {

	/**
	* Writes all the {@link BlockTermState} of the provided {@link STBlockLine} to the given
	* output.
	*/
	public void writeLineTermStates(
	DataOutput termStatesOutput, STBlockLine line, DeltaBaseTermStateSerializer encoder)
	throws IOException {

	FieldMetadataTermState fieldMetadataTermState;
	int size = line.termStates.size();
	assert size > 0 : "not valid block line with :" + size + " lines.";
	if (size == 1) {
	// When there is only 1 field, write its id as negative, followed by the field TermState.
	int fieldID = line.termStates.get(0).fieldMetadata.getFieldInfo().number;
	termStatesOutput.writeZInt(-fieldID);
	fieldMetadataTermState = line.termStates.get(0);
	encoder.writeTermState(
	termStatesOutput,
	fieldMetadataTermState.fieldMetadata.getFieldInfo(),
	fieldMetadataTermState.state);
	return;
	}

	termStatesOutput.writeZInt(size);
	// First iteration writes the fields ids.
	for (int i = 0; i < size; i++) {
	fieldMetadataTermState = line.termStates.get(i);
	termStatesOutput.writeVInt(fieldMetadataTermState.fieldMetadata.getFieldInfo().number);
	}
	// Second iteration writes the corresponding field TermStates.
	for (int i = 0; i < size; i++) {
	fieldMetadataTermState = line.termStates.get(i);
	encoder.writeTermState(
	termStatesOutput,
	fieldMetadataTermState.fieldMetadata.getFieldInfo(),
	fieldMetadataTermState.state);
	}
	}

	/**
	* Reads a single {@link BlockTermState} for the provided field in the current block line of the
	* provided input.
	*
	* @param termStatesInput Data input to read the {@link BlockTermState} from.
	* @param blockHeader Current block header.
	* @param reuse A previous {@link BlockTermState} to reuse; or null to create a new one.
	* @return The {@link BlockTermState} corresponding to the provided field id; or null if the
	* field does not occur in the line.
	*/
	public BlockTermState readTermStateForField(
	int fieldId,
	DataInput termStatesInput,
	DeltaBaseTermStateSerializer termStateSerializer,
	BlockHeader blockHeader,
	FieldInfos fieldInfos,
	BlockTermState reuse)
	throws IOException {
	assert fieldId >= 0;
	int numFields = termStatesInput.readZInt();
	if (numFields <= 0) {
	int readFieldId = -numFields;
	if (fieldId == readFieldId) {
	return termStateSerializer.readTermState(
	blockHeader.getBaseDocsFP(),
	blockHeader.getBasePositionsFP(),
	blockHeader.getBasePayloadsFP(),
	termStatesInput,
	fieldInfos.fieldInfo(readFieldId),
	reuse);
	}
	return null;
	}

	// There are multiple fields for the term.
	// We have to read all the field ids (aka field numbers) sequentially.
	// Then if the required field is in the list, we have to read all the TermState
	// sequentially. This could be optimized with a jump-to-middle offset
	// for example, but we don't need that currently.

	boolean isFieldInList = false;
	int[] readFieldIds = new int[numFields];
	for (int i = 0; i < numFields; i++) {
	int readFieldId = termStatesInput.readVInt();
	if (!isFieldInList && readFieldId > fieldId) {
	// As the list of fieldIds is sorted we can return early if we find fieldId greater than
	// the seeked one.
	// But if we found the seeked one, we have to read all the list to get to the term state
	// part afterward (there is no jump offset).
	return null;
	}
	isFieldInList \|= readFieldId == fieldId;
	readFieldIds[i] = readFieldId;
	}
	if (isFieldInList) {
	for (int readFieldId : readFieldIds) {
	BlockTermState termState =
	termStateSerializer.readTermState(
	blockHeader.getBaseDocsFP(),
	blockHeader.getBasePositionsFP(),
	blockHeader.getBasePayloadsFP(),
	termStatesInput,
	fieldInfos.fieldInfo(readFieldId),
	reuse);
	if (fieldId == readFieldId) {
	return termState;
	}
	}
	}
	return null;
	}

	/**
	* Reads all the {@link BlockTermState} of all the field in the current block line of the
	* provided input.
	*
	* @param fieldTermStatesMap Map filled with the term states for each field. It is cleared
	* first.
	* @see #readTermStateForField
	*/
	public void readFieldTermStatesMap(
	DataInput termStatesInput,
	DeltaBaseTermStateSerializer termStateSerializer,
	BlockHeader blockHeader,
	FieldInfos fieldInfos,
	Map<String, BlockTermState> fieldTermStatesMap)
	throws IOException {
	fieldTermStatesMap.clear();
	int numFields = termStatesInput.readZInt();
	if (numFields <= 0) {
	int fieldId = -numFields;
	fieldTermStatesMap.put(
	fieldInfos.fieldInfo(fieldId).name,
	termStateSerializer.readTermState(
	blockHeader.getBaseDocsFP(),
	blockHeader.getBasePositionsFP(),
	blockHeader.getBasePayloadsFP(),
	termStatesInput,
	fieldInfos.fieldInfo(fieldId),
	null));
	return;
	}
	for (int fieldId : readFieldIds(termStatesInput, numFields)) {
	fieldTermStatesMap.put(
	fieldInfos.fieldInfo(fieldId).name,
	termStateSerializer.readTermState(
	blockHeader.getBaseDocsFP(),
	blockHeader.getBasePositionsFP(),
	blockHeader.getBasePayloadsFP(),
	termStatesInput,
	fieldInfos.fieldInfo(fieldId),
	null));
	}
	}

	/** Reads all the field ids in the current block line of the provided input. */
	public int[] readFieldIds(DataInput termStatesInput, int numFields) throws IOException {
	int[] fieldIds = new int[numFields];
	for (int i = 0; i < numFields; i++) {
	fieldIds[i] = termStatesInput.readVInt();
	}
	return fieldIds;
	}
	}
	}