blob: 3a5aad35c04ee5bd831f3eb10559c36693899bf7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "ExcelBoolReader.h"
#include <Columns/ColumnsNumber.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_BOOL;
extern const int ILLEGAL_COLUMN;
}
}
namespace local_engine
{
using namespace DB;
DB::ColumnUInt8 * checkAndGetDeserializeColumnType(IColumn & column)
{
auto * col = typeid_cast<DB::ColumnUInt8 *>(&column);
if (!checkAndGetColumn<DB::ColumnUInt8>(&column))
throw Exception(DB::ErrorCodes::ILLEGAL_COLUMN, "Bool type can only deserialize columns of type UInt8.{}", column.getName());
return col;
}
bool tryDeserializeAllVariants(ColumnUInt8 * column, ReadBuffer & istr)
{
if (checkCharCaseInsensitive('1', istr))
{
column->insert(true);
}
else if (checkCharCaseInsensitive('0', istr))
{
column->insert(false);
}
/// 'True' and 'T'
else if (checkCharCaseInsensitive('t', istr))
{
/// Check if it's just short form `T` or full form `True`
if (checkCharCaseInsensitive('r', istr))
{
if (!checkStringCaseInsensitive("ue", istr))
return false;
}
column->insert(true);
}
/// 'False' and 'F'
else if (checkCharCaseInsensitive('f', istr))
{
/// Check if it's just short form `F` or full form `False`
if (checkCharCaseInsensitive('a', istr))
{
if (!checkStringCaseInsensitive("lse", istr))
return false;
}
column->insert(false);
}
/// 'Yes' and 'Y'
else if (checkCharCaseInsensitive('y', istr))
{
/// Check if it's just short form `Y` or full form `Yes`
if (checkCharCaseInsensitive('e', istr))
{
if (!checkCharCaseInsensitive('s', istr))
return false;
}
column->insert(true);
}
/// 'No' and 'N'
else if (checkCharCaseInsensitive('n', istr))
{
/// Check if it's just short form `N` or full form `No`
checkCharCaseInsensitive('o', istr);
column->insert(false);
}
/// 'On' and 'Off'
else if (checkCharCaseInsensitive('o', istr))
{
if (checkCharCaseInsensitive('n', istr))
column->insert(true);
else if (checkStringCaseInsensitive("ff", istr))
{
column->insert(false);
}
else
return false;
}
/// 'Enable' and 'Enabled'
else if (checkStringCaseInsensitive("enable", istr))
{
/// Check if it's 'enable' or 'enabled'
checkCharCaseInsensitive('d', istr);
column->insert(true);
}
/// 'Disable' and 'Disabled'
else if (checkStringCaseInsensitive("disable", istr))
{
/// Check if it's 'disable' or 'disabled'
checkCharCaseInsensitive('d', istr);
column->insert(false);
}
else
{
return false;
}
return true;
}
void deserializeImpl(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings, std::function<bool(ReadBuffer &)> check_end_of_value)
{
DB::ColumnUInt8 * col = checkAndGetDeserializeColumnType(column);
DB::PeekableReadBuffer buf(istr);
buf.setCheckpoint();
if (checkString(settings.bool_true_representation, buf) && check_end_of_value(buf))
{
col->insert(true);
return;
}
buf.rollbackToCheckpoint();
if (checkString(settings.bool_false_representation, buf) && check_end_of_value(buf))
{
col->insert(false);
buf.dropCheckpoint();
if (buf.hasUnreadData())
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
return;
}
buf.rollbackToCheckpoint();
if (tryDeserializeAllVariants(col, buf) && check_end_of_value(buf))
{
buf.dropCheckpoint();
if (buf.hasUnreadData())
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if "
"bool_true_representation or bool_false_representation contains some delimiters of input format");
return;
}
buf.makeContinuousMemoryFromCheckpointToPos();
buf.rollbackToCheckpoint();
throw Exception(
ErrorCodes::CANNOT_PARSE_BOOL,
"Cannot parse boolean value here: '{}', should be '{}' or '{}' controlled by setting bool_true_representation and "
"bool_false_representation or one of "
"True/False/T/F/Y/N/Yes/No/On/Off/Enable/Disable/Enabled/Disabled/1/0",
String(buf.position(), std::min(10lu, buf.available())),
settings.bool_true_representation,
settings.bool_false_representation);
}
void deserializeExcelBoolTextCSV(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings)
{
if (istr.eof())
throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_BOOL, "Expected boolean value but get EOF.");
deserializeImpl(
column,
istr,
settings,
[&](DB::ReadBuffer & buf)
{
/// skip all chars before quote/delimiter exclude line delimiter
while (!buf.eof() && *buf.position() == ' ')
++buf.position();
return buf.eof() || *buf.position() == settings.csv.delimiter || *buf.position() == '\n' || *buf.position() == '\r';
});
}
}