blob: ae4145338ca58bde37301a95f9b12e5f92bed082 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <glog/logging.h>
#include <limits.h>
#include <stdint.h>
#include <string.h>
#include <algorithm>
#include <ostream>
#include <string>
#include <type_traits>
#include "absl/strings/substitute.h"
#include "common/status.h"
#include "gutil/endian.h"
#include "olap/decimal12.h"
#include "olap/olap_common.h"
#include "olap/types.h"
#include "util/slice.h"
#include "vec/core/extended_types.h"
#include "vec/core/types.h"
namespace doris {
using FullEncodeAscendingFunc = void (*)(const void* value, std::string* buf);
using EncodeAscendingFunc = void (*)(const void* value, size_t index_size, std::string* buf);
using DecodeAscendingFunc = Status (*)(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr);
// Order-preserving binary encoding for values of a particular type so that
// those values can be compared by memcpy their encoded bytes.
//
// To obtain instance of this class, use the `get_key_coder(FieldType)` method.
class KeyCoder {
public:
template <typename TraitsType>
KeyCoder(TraitsType traits);
// encode the provided `value` into `buf`.
void full_encode_ascending(const void* value, std::string* buf) const {
_full_encode_ascending(value, buf);
}
// similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value.
// only applicable to string type
void encode_ascending(const void* value, size_t index_size, std::string* buf) const {
_encode_ascending(value, index_size, buf);
}
// Only used for test, should delete it in the future
Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) const {
return _decode_ascending(encoded_key, index_size, cell_ptr);
}
private:
FullEncodeAscendingFunc _full_encode_ascending;
EncodeAscendingFunc _encode_ascending;
DecodeAscendingFunc _decode_ascending;
};
extern const KeyCoder* get_key_coder(FieldType type);
template <FieldType field_type, typename Enable = void>
class KeyCoderTraits {};
template <FieldType field_type>
class KeyCoderTraits<
field_type,
typename std::enable_if<
IsIntegral<typename CppTypeTraits<field_type>::CppType>::value ||
vectorized::IsDecimalNumber<typename CppTypeTraits<field_type>::CppType>>::type> {
public:
using CppType = typename CppTypeTraits<field_type>::CppType;
using UnsignedCppType = typename CppTypeTraits<field_type>::UnsignedCppType;
static void full_encode_ascending(const void* value, std::string* buf) {
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, value, sizeof(unsigned_val));
// swap MSB to encode integer
if (IsSigned<CppType>::value) {
unsigned_val ^=
(static_cast<UnsignedCppType>(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1));
}
// make it bigendian
unsigned_val = to_endian<std::endian::big>(unsigned_val);
buf->append((char*)&unsigned_val, sizeof(unsigned_val));
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
// decode_ascending only used in orinal index page, maybe should remove it in the future.
// currently, we reduce the usage of this method.
if (encoded_key->size < sizeof(UnsignedCppType)) {
return Status::InvalidArgument("Key too short, need={} vs real={}",
sizeof(UnsignedCppType), encoded_key->size);
}
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType));
unsigned_val = to_endian<std::endian::big>(unsigned_val);
if (IsSigned<CppType>::value) {
unsigned_val ^=
(static_cast<UnsignedCppType>(1) << (sizeof(UnsignedCppType) * CHAR_BIT - 1));
}
memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType));
encoded_key->remove_prefix(sizeof(UnsignedCppType));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_DATE> {
public:
using CppType = typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::CppType;
using UnsignedCppType =
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATE>::UnsignedCppType;
public:
static void full_encode_ascending(const void* value, std::string* buf) {
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, value, sizeof(unsigned_val));
// make it bigendian
unsigned_val = to_endian<std::endian::big>(unsigned_val);
buf->append((char*)&unsigned_val, sizeof(unsigned_val));
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
if (encoded_key->size < sizeof(UnsignedCppType)) {
return Status::InvalidArgument("Key too short, need={} vs real={}",
sizeof(UnsignedCppType), encoded_key->size);
}
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType));
unsigned_val = to_endian<std::endian::big>(unsigned_val);
memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType));
encoded_key->remove_prefix(sizeof(UnsignedCppType));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_DATEV2> {
public:
using CppType = typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::CppType;
using UnsignedCppType =
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATEV2>::UnsignedCppType;
public:
static void full_encode_ascending(const void* value, std::string* buf) {
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, value, sizeof(unsigned_val));
// make it bigendian
unsigned_val = to_endian<std::endian::big>(unsigned_val);
buf->append((char*)&unsigned_val, sizeof(unsigned_val));
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
if (encoded_key->size < sizeof(UnsignedCppType)) {
return Status::InvalidArgument(absl::Substitute("Key too short, need=$0 vs real=$1",
sizeof(UnsignedCppType),
encoded_key->size));
}
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType));
unsigned_val = to_endian<std::endian::big>(unsigned_val);
memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType));
encoded_key->remove_prefix(sizeof(UnsignedCppType));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2> {
public:
using CppType = typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::CppType;
using UnsignedCppType =
typename CppTypeTraits<FieldType::OLAP_FIELD_TYPE_DATETIMEV2>::UnsignedCppType;
public:
static void full_encode_ascending(const void* value, std::string* buf) {
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, value, sizeof(unsigned_val));
// make it bigendian
unsigned_val = to_endian<std::endian::big>(unsigned_val);
buf->append((char*)&unsigned_val, sizeof(unsigned_val));
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
if (encoded_key->size < sizeof(UnsignedCppType)) {
return Status::InvalidArgument(absl::Substitute("Key too short, need=$0 vs real=$1",
sizeof(UnsignedCppType),
encoded_key->size));
}
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType));
unsigned_val = to_endian<std::endian::big>(unsigned_val);
memcpy(cell_ptr, &unsigned_val, sizeof(UnsignedCppType));
encoded_key->remove_prefix(sizeof(UnsignedCppType));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_DECIMAL> {
public:
static void full_encode_ascending(const void* value, std::string* buf) {
decimal12_t decimal_val;
memcpy(&decimal_val, value, sizeof(decimal12_t));
KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_BIGINT>::full_encode_ascending(
&decimal_val.integer, buf);
KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_INT>::full_encode_ascending(&decimal_val.fraction,
buf);
} // namespace doris
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
decimal12_t decimal_val = {0, 0};
RETURN_IF_ERROR(KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_BIGINT>::decode_ascending(
encoded_key, sizeof(decimal_val.integer), (uint8_t*)&decimal_val.integer));
RETURN_IF_ERROR(KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_INT>::decode_ascending(
encoded_key, sizeof(decimal_val.fraction), (uint8_t*)&decimal_val.fraction));
memcpy(cell_ptr, &decimal_val, sizeof(decimal12_t));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_CHAR> {
public:
static void full_encode_ascending(const void* value, std::string* buf) {
auto slice = reinterpret_cast<const Slice*>(value);
buf->append(slice->get_data(), slice->get_size());
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
const Slice* slice = (const Slice*)value;
CHECK(index_size <= slice->size)
<< "index size is larger than char size, index=" << index_size
<< ", char=" << slice->size;
buf->append(slice->data, index_size);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
throw Exception(Status::FatalError("decode_ascending is not implemented"));
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_VARCHAR> {
public:
static void full_encode_ascending(const void* value, std::string* buf) {
auto slice = reinterpret_cast<const Slice*>(value);
buf->append(slice->get_data(), slice->get_size());
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
const Slice* slice = (const Slice*)value;
size_t copy_size = std::min(index_size, slice->size);
buf->append(slice->data, copy_size);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
throw Exception(Status::FatalError("decode_ascending is not implemented"));
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_STRING> {
public:
static void full_encode_ascending(const void* value, std::string* buf) {
auto slice = reinterpret_cast<const Slice*>(value);
buf->append(slice->get_data(), slice->get_size());
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
const Slice* slice = (const Slice*)value;
size_t copy_size = std::min(index_size, slice->size);
buf->append(slice->data, copy_size);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
throw Exception(Status::FatalError("decode_ascending is not implemented"));
}
};
template <FieldType field_type>
class KeyCoderTraitsForFloat {
public:
using CppType = typename CppTypeTraits<field_type>::CppType;
using UnsignedCppType = typename CppTypeTraits<field_type>::UnsignedCppType;
static UnsignedCppType encode_float(UnsignedCppType val) {
constexpr UnsignedCppType sign_bit = (UnsignedCppType)1
<< (sizeof(UnsignedCppType) * 8 - 1);
if (val & sign_bit) {
return ~val;
} else {
return val ^ sign_bit;
}
}
static UnsignedCppType decode_float(UnsignedCppType val) {
constexpr UnsignedCppType sign_bit = (UnsignedCppType)1
<< (sizeof(UnsignedCppType) * 8 - 1);
if (val & sign_bit) {
return val ^ sign_bit;
} else {
return ~val;
}
}
static void full_encode_ascending(const void* value, std::string* buf) {
CppType val;
memcpy(&val, value, sizeof(CppType));
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, &val, sizeof(UnsignedCppType));
unsigned_val = encode_float(unsigned_val);
unsigned_val = to_endian<std::endian::big>(unsigned_val);
buf->append((char*)&unsigned_val, sizeof(UnsignedCppType));
}
static void encode_ascending(const void* value, size_t index_size, std::string* buf) {
full_encode_ascending(value, buf);
}
static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) {
if (encoded_key->size < sizeof(UnsignedCppType)) {
return Status::InvalidArgument(absl::Substitute("Key too short, need=$0 vs real=$1",
sizeof(UnsignedCppType),
encoded_key->size));
}
UnsignedCppType unsigned_val;
memcpy(&unsigned_val, encoded_key->data, sizeof(UnsignedCppType));
unsigned_val = to_endian<std::endian::big>(unsigned_val);
unsigned_val = decode_float(unsigned_val);
CppType val;
memcpy(&val, &unsigned_val, sizeof(CppType));
memcpy(cell_ptr, &val, sizeof(CppType));
encoded_key->remove_prefix(sizeof(UnsignedCppType));
return Status::OK();
}
};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_FLOAT>
: public KeyCoderTraitsForFloat<FieldType::OLAP_FIELD_TYPE_FLOAT> {};
template <>
class KeyCoderTraits<FieldType::OLAP_FIELD_TYPE_DOUBLE>
: public KeyCoderTraitsForFloat<FieldType::OLAP_FIELD_TYPE_DOUBLE> {};
} // namespace doris