processing/src/main/java/org/apache/druid/segment/column/TypeStrategy.java - druid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.druid.segment.column;

 import it.unimi.dsi.fastutil.Hash;
 import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.error.DruidException;

 import java.nio.ByteBuffer;
 import java.util.Comparator;

 /**
  * TypeStrategy provides value comparison and binary serialization for Druid types. This can be obtained for ANY Druid
  * type via {@link TypeSignature#getStrategy()}.
  *
  * IMPORTANT!!! DO NOT USE THIS FOR WRITING COLUMNS, THERE ARE VERY LIKELY FAR BETTER WAYS TO DO THIS. However, if you
  * need to store a single value or small number of values, continue reading.
  *
  * ALSO IMPORTANT!!! This is primarily intended for writing ephemeral values within a single process, and is not
  * especially well suited (by itself) for persistent storage of data or cross process transfer. The support typically
  * necessary for such more persistent storage, such as tracking version of a format or endianness of the values, should
  * be handled externally to support these use cases.
  *
  * All implementations of this mechanism support reading and writing ONLY non-null values. To handle nulls inline with
  * your values, consider {@link NullableTypeStrategy}, which might be acceptable to use if you need to read and write
  * nullable values, AND, you have enough memory to burn a full byte for every value you want to store. It will store
  * values with a leading byte containing either {@link NullHandling#IS_NULL_BYTE} or
  * {@link NullHandling#IS_NOT_NULL_BYTE} as appropriate. If you have a lot of values to write and a lot of nulls,
  * consider alternative approaches to tracking your nulls instead.
  *
  * This mechanism allows using the natural {@link ByteBuffer#position()} and modify the underlying position as they
  * operate, and also random access reads are specific offets, which do not modify the underlying position. If a method
  * accepts an offset parameter, it does not modify the position, if not, it does.
  *
  * The only methods implementors are required to provide are {@link #read(ByteBuffer)},
  * {@link #write(ByteBuffer, Object, int)} and {@link #estimateSizeBytes(Object)}, default implementations are provided
  * to set and reset buffer positions as appropriate for the offset based methods, but may be overridden if a more
  * optimized implementation is needed.
  *
  * Implementations of this interface should be thread safe, but may not use {@link ByteBuffer} in a thread safe manner,
  * potentially modifying positions and limits, either temporarily or permanently depending on which set of methods is
  * called.
  *
  * This interface extends {@code Comparator<Object>} instead of {@code Comparator<T>} because trying to specialize the
  * type of the comparison method can run into issues for comparators of objects that can sometimes be of a different
  * java class type.  For example, {@code Comparator<Long>} cannot accept Integer objects in its comparison method
  * and there is no easy way for this interface definition to allow {@code TypeStrategy<Long>} to actually be a
  * {@code Comparator<Number>}.  So, we fall back to effectively erasing the generic type and having them all be
  * {@code Comparator<Object>}.
  */
 public interface TypeStrategy<T> extends Comparator<Object>, Hash.Strategy<T>
 {
   /**
    * Estimate the size in bytes that writing this value to memory would require. This method is not required to be
    * exactly correct, but many implementations might be. Implementations should err on the side of over-estimating if
    * exact sizing is not efficient.
    *
    * Example usage of this method is estimating heap memory usage for an aggregator or the amount of buffer which
    * might need allocated to then {@link #write} a value
    */
   int estimateSizeBytes(T value);

   /**
    * Read a non-null value from the {@link ByteBuffer} at the current {@link ByteBuffer#position()}. This will move
    * the underlying position by the size of the value read.
    *
    * The value returned from this method may retain a reference to the provided {@link ByteBuffer}. If it does, then
    * {@link #readRetainsBufferReference()} returns true.
    */
   T read(ByteBuffer buffer);

   /**
    * Whether the {@link #read} methods return an object that may retain a reference to the provided {@link ByteBuffer}.
    * If a reference is sometimes retained, this method returns true. It returns false if, and only if, a reference
    * is *never* retained.
    */
   boolean readRetainsBufferReference();

   /**
    * Write a non-null value to the {@link ByteBuffer} at position {@link ByteBuffer#position()}. This will move the
    * underlying position by the size of the value written.
    *
    * This method returns the number of bytes written. If writing the value would take more than 'maxSizeBytes', this
    * method will return a negative value indicating the number of additional bytes that would be required to fully
    * write the value. Partial results may be written to the buffer when in this state, and the position may be left
    * at whatever point the implementation ran out of space while writing the value. Callers should save the starting
    * position if it is necessary to 'rewind' after a partial write.
    *
    * Callers MUST check that the return value is positive which indicates a successful write, while a negative response
    * a partial write.
    *
    * @return number of bytes written
    */
   int write(ByteBuffer buffer, T value, int maxSizeBytes);

   /**
    * Read a non-null value from the {@link ByteBuffer} at the requested position. This will not permanently move the
    * underlying {@link ByteBuffer#position()}, but may temporarily modify the buffer position during reading so cannot
    * be considered thread safe usage of the buffer.
    *
    * The contract of this method is that any value returned from this method MUST be completely detached from the
    * underlying {@link ByteBuffer}, since it might outlive the memory location being allocated to hold the object.
    * In other words, if an object is memory mapped, it must be copied on heap, or relocated to another memory location
    * that is owned by the caller with {@link #write}.
    */
   default T read(ByteBuffer buffer, int offset)
   {
     final int oldPosition = buffer.position();
     try {
       buffer.position(offset);
       return read(buffer);
     }
     finally {
       buffer.position(oldPosition);
     }
   }

   /**
    * Write a non-null value to the {@link ByteBuffer} at the requested position. This will not permanently move the
    * underlying {@link ByteBuffer#position()}, but may temporarily modify the buffer position during reading so cannot
    * be considered thread safe usage of the buffer.
    *
    * This method returns the number of bytes written. If writing the value would take more than 'maxSizeBytes', this
    * method will return a negative value indicating the number of additional bytes that would be required to fully
    * write the value. Partial results may be written to the buffer when in this state, but the underlying buffer
    * position will be unaffected regardless of whether a write operation was successful or not.
    *
    * Callers MUST check that the return value is positive which indicates a successful write, while a negative response
    * a partial write.
    *
    * @return number of bytes written
    */
   default int write(ByteBuffer buffer, int offset, T value, int maxSizeBytes)
   {
     final int oldPosition = buffer.position();
     try {
       buffer.position(offset);
       return write(buffer, value, maxSizeBytes);
     }
     finally {
       buffer.position(oldPosition);
     }
   }

   /**
    * Translate raw byte array into a value. This is primarily useful for transforming self contained values that are
    * serialized into byte arrays, such as happens with 'COMPLEX' types which serialize to base64 strings in JSON
    * responses.
    *
    * 'COMPLEX' types should implement this method to participate in the expression systems built-in function
    * to deserialize base64 encoded values,
    * {@link org.apache.druid.math.expr.BuiltInExprMacros.ComplexDecodeBase64ExprMacro}.
    */
   default T fromBytes(byte[] value)
   {
     throw new IllegalStateException("Not supported");
   }

   /**
    * Whether the type is groupable or not. This is always true for all the primitive types, arrays, and nested arrays
    * therefore the SQL and the native layer might ignore this flag for those types. For complex types, this flag can be
    * true or false, depending on whether the semantics and implementation of the type naturally leads to groupability
    * or not. For example, it makes sense for JSON columns to be groupable, however there is little sense in grouping
    * sketches (before finalizing).
    *
    * If a type is groupable, it MUST implement the {@link #hashCode} and {@link #equals} correctly
    */
   default boolean groupable()
   {
     return false;
   }

   @Override
   default int hashCode(T o)
   {
     throw DruidException.defensive("Not implemented. Check groupable() first");
   }

   @Override
   default boolean equals(T a, T b)
   {
     throw DruidException.defensive("Not implemented. Check groupable() first");
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.druid.segment.column;

	import it.unimi.dsi.fastutil.Hash;
	import org.apache.druid.common.config.NullHandling;
	import org.apache.druid.error.DruidException;

	import java.nio.ByteBuffer;
	import java.util.Comparator;

	/**
	* TypeStrategy provides value comparison and binary serialization for Druid types. This can be obtained for ANY Druid
	* type via {@link TypeSignature#getStrategy()}.
	*
	* IMPORTANT!!! DO NOT USE THIS FOR WRITING COLUMNS, THERE ARE VERY LIKELY FAR BETTER WAYS TO DO THIS. However, if you
	* need to store a single value or small number of values, continue reading.
	*
	* ALSO IMPORTANT!!! This is primarily intended for writing ephemeral values within a single process, and is not
	* especially well suited (by itself) for persistent storage of data or cross process transfer. The support typically
	* necessary for such more persistent storage, such as tracking version of a format or endianness of the values, should
	* be handled externally to support these use cases.
	*
	* All implementations of this mechanism support reading and writing ONLY non-null values. To handle nulls inline with
	* your values, consider {@link NullableTypeStrategy}, which might be acceptable to use if you need to read and write
	* nullable values, AND, you have enough memory to burn a full byte for every value you want to store. It will store
	* values with a leading byte containing either {@link NullHandling#IS_NULL_BYTE} or
	* {@link NullHandling#IS_NOT_NULL_BYTE} as appropriate. If you have a lot of values to write and a lot of nulls,
	* consider alternative approaches to tracking your nulls instead.
	*
	* This mechanism allows using the natural {@link ByteBuffer#position()} and modify the underlying position as they
	* operate, and also random access reads are specific offets, which do not modify the underlying position. If a method
	* accepts an offset parameter, it does not modify the position, if not, it does.
	*
	* The only methods implementors are required to provide are {@link #read(ByteBuffer)},
	* {@link #write(ByteBuffer, Object, int)} and {@link #estimateSizeBytes(Object)}, default implementations are provided
	* to set and reset buffer positions as appropriate for the offset based methods, but may be overridden if a more
	* optimized implementation is needed.
	*
	* Implementations of this interface should be thread safe, but may not use {@link ByteBuffer} in a thread safe manner,
	* potentially modifying positions and limits, either temporarily or permanently depending on which set of methods is
	* called.
	*
	* This interface extends {@code Comparator<Object>} instead of {@code Comparator<T>} because trying to specialize the
	* type of the comparison method can run into issues for comparators of objects that can sometimes be of a different
	* java class type. For example, {@code Comparator<Long>} cannot accept Integer objects in its comparison method
	* and there is no easy way for this interface definition to allow {@code TypeStrategy<Long>} to actually be a
	* {@code Comparator<Number>}. So, we fall back to effectively erasing the generic type and having them all be
	* {@code Comparator<Object>}.
	*/
	public interface TypeStrategy<T> extends Comparator<Object>, Hash.Strategy<T>
	{
	/**
	* Estimate the size in bytes that writing this value to memory would require. This method is not required to be
	* exactly correct, but many implementations might be. Implementations should err on the side of over-estimating if
	* exact sizing is not efficient.
	*
	* Example usage of this method is estimating heap memory usage for an aggregator or the amount of buffer which
	* might need allocated to then {@link #write} a value
	*/
	int estimateSizeBytes(T value);

	/**
	* Read a non-null value from the {@link ByteBuffer} at the current {@link ByteBuffer#position()}. This will move
	* the underlying position by the size of the value read.
	*
	* The value returned from this method may retain a reference to the provided {@link ByteBuffer}. If it does, then
	* {@link #readRetainsBufferReference()} returns true.
	*/
	T read(ByteBuffer buffer);

	/**
	* Whether the {@link #read} methods return an object that may retain a reference to the provided {@link ByteBuffer}.
	* If a reference is sometimes retained, this method returns true. It returns false if, and only if, a reference
	* is never retained.
	*/
	boolean readRetainsBufferReference();

	/**
	* Write a non-null value to the {@link ByteBuffer} at position {@link ByteBuffer#position()}. This will move the
	* underlying position by the size of the value written.
	*
	* This method returns the number of bytes written. If writing the value would take more than 'maxSizeBytes', this
	* method will return a negative value indicating the number of additional bytes that would be required to fully
	* write the value. Partial results may be written to the buffer when in this state, and the position may be left
	* at whatever point the implementation ran out of space while writing the value. Callers should save the starting
	* position if it is necessary to 'rewind' after a partial write.
	*
	* Callers MUST check that the return value is positive which indicates a successful write, while a negative response
	* a partial write.
	*
	* @return number of bytes written
	*/
	int write(ByteBuffer buffer, T value, int maxSizeBytes);

	/**
	* Read a non-null value from the {@link ByteBuffer} at the requested position. This will not permanently move the
	* underlying {@link ByteBuffer#position()}, but may temporarily modify the buffer position during reading so cannot
	* be considered thread safe usage of the buffer.
	*
	* The contract of this method is that any value returned from this method MUST be completely detached from the
	* underlying {@link ByteBuffer}, since it might outlive the memory location being allocated to hold the object.
	* In other words, if an object is memory mapped, it must be copied on heap, or relocated to another memory location
	* that is owned by the caller with {@link #write}.
	*/
	default T read(ByteBuffer buffer, int offset)
	{
	final int oldPosition = buffer.position();
	try {
	buffer.position(offset);
	return read(buffer);
	}
	finally {
	buffer.position(oldPosition);
	}
	}

	/**
	* Write a non-null value to the {@link ByteBuffer} at the requested position. This will not permanently move the
	* underlying {@link ByteBuffer#position()}, but may temporarily modify the buffer position during reading so cannot
	* be considered thread safe usage of the buffer.
	*
	* This method returns the number of bytes written. If writing the value would take more than 'maxSizeBytes', this
	* method will return a negative value indicating the number of additional bytes that would be required to fully
	* write the value. Partial results may be written to the buffer when in this state, but the underlying buffer
	* position will be unaffected regardless of whether a write operation was successful or not.
	*
	* Callers MUST check that the return value is positive which indicates a successful write, while a negative response
	* a partial write.
	*
	* @return number of bytes written
	*/
	default int write(ByteBuffer buffer, int offset, T value, int maxSizeBytes)
	{
	final int oldPosition = buffer.position();
	try {
	buffer.position(offset);
	return write(buffer, value, maxSizeBytes);
	}
	finally {
	buffer.position(oldPosition);
	}
	}

	/**
	* Translate raw byte array into a value. This is primarily useful for transforming self contained values that are
	* serialized into byte arrays, such as happens with 'COMPLEX' types which serialize to base64 strings in JSON
	* responses.
	*
	* 'COMPLEX' types should implement this method to participate in the expression systems built-in function
	* to deserialize base64 encoded values,
	* {@link org.apache.druid.math.expr.BuiltInExprMacros.ComplexDecodeBase64ExprMacro}.
	*/
	default T fromBytes(byte[] value)
	{
	throw new IllegalStateException("Not supported");
	}

	/**
	* Whether the type is groupable or not. This is always true for all the primitive types, arrays, and nested arrays
	* therefore the SQL and the native layer might ignore this flag for those types. For complex types, this flag can be
	* true or false, depending on whether the semantics and implementation of the type naturally leads to groupability
	* or not. For example, it makes sense for JSON columns to be groupable, however there is little sense in grouping
	* sketches (before finalizing).
	*
	* If a type is groupable, it MUST implement the {@link #hashCode} and {@link #equals} correctly
	*/
	default boolean groupable()
	{
	return false;
	}

	@Override
	default int hashCode(T o)
	{
	throw DruidException.defensive("Not implemented. Check groupable() first");
	}

	@Override
	default boolean equals(T a, T b)
	{
	throw DruidException.defensive("Not implemented. Check groupable() first");
	}
	}