blob: 5c3e5b6d4449920474a7c975abcb8b909ea4c4dc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
}
using namespace DB;
namespace local_engine
{
namespace
{
/// The difference between reinterpretAsStringSpark and reinterpretAsString is that reinterpretAsStringSpark:
/// 1. Does not cut trailing zeros
/// 2. Output reinterpreted bytes in big-endian order for integer type. e.g. input: 0x1234, output: [0x00 0x00 0x12 0x34]
class FunctionReinterpretAsStringSpark : public IFunction
{
public:
static constexpr auto name = "reinterpretAsStringSpark";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionReinterpretAsStringSpark>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
DataTypePtr from_type = arguments[0].type;
if (!from_type->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Cannot reinterpret {} as String because it is not contiguous in memory",
from_type->getName());
DataTypePtr to_type = std::make_shared<DataTypeString>();
return to_type;
}
ColumnPtr
executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
auto is_string_type = isString(arguments[0].type);
ColumnPtr result;
const IColumn & src = *arguments[0].column;
MutableColumnPtr dst = result_type->createColumn();
ColumnString * dst_concrete = assert_cast<ColumnString *>(dst.get());
size_t rows = src.size();
ColumnString::Chars & data_to = dst_concrete->getChars();
ColumnString::Offsets & offsets_to = dst_concrete->getOffsets();
offsets_to.resize_exact(rows);
ColumnString::Offset offset = 0;
for (size_t i = 0; i < rows; ++i)
{
/// Transform little-endian in input to big-endian in output
/// NOTE: We don't need do the transform for string type
String data = src.getDataAt(i).toString();
if (!is_string_type)
std::reverse(data.begin(), data.end());
data_to.resize(offset + data.size() + 1);
memcpy(&data_to[offset], data.data(), data.size());
offset += data.size();
data_to[offset] = 0;
++offset;
offsets_to[i] = offset;
}
result = std::move(dst);
return result;
}
};
}
REGISTER_FUNCTION(ReinterpretAsStringSpark)
{
factory.registerFunction<FunctionReinterpretAsStringSpark>();
}
}