blob: ef1b0a0ef0cc721b942b7da3199070571da3c84e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/common/string_utils/string_utils.h"
#include <gtest/gtest.h>
#include "vec/functions/like.h"
namespace doris::vectorized {
class StringUtilsTest : public ::testing::Test {
protected:
void SetUp() override {}
void TearDown() override {}
};
TEST_F(StringUtilsTest, TestIsAscii) {
// Test ASCII characters
for (unsigned char c = 0; c < 0x80 - 1; ++c) {
EXPECT_TRUE(is_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, TestIsAlphaAscii) {
// Test lowercase letters
for (char c = 'a'; c <= 'z'; ++c) {
EXPECT_TRUE(is_alpha_ascii(c)) << "Failed for character " << c;
}
// Test uppercase letters
for (char c = 'A'; c <= 'Z'; ++c) {
EXPECT_TRUE(is_alpha_ascii(c)) << "Failed for character " << c;
}
// Test non-alphabetic ASCII
for (char c = 0; c < 'A'; ++c) {
EXPECT_FALSE(is_alpha_ascii(c)) << "Failed for character with code " << static_cast<int>(c);
}
for (char c = 'Z' + 1; c < 'a'; ++c) {
EXPECT_FALSE(is_alpha_ascii(c)) << "Failed for character " << c;
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_alpha_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, TestIsNumericAscii) {
// Test digits
for (char c = '0'; c <= '9'; ++c) {
EXPECT_TRUE(is_numeric_ascii(c)) << "Failed for character " << c;
}
// Test non-digit ASCII
for (char c = 0; c < '0'; ++c) {
EXPECT_FALSE(is_numeric_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_numeric_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, TestIsAlphaNumericAscii) {
// Test lowercase letters
for (char c = 'a'; c <= 'z'; ++c) {
EXPECT_TRUE(is_alpha_numeric_ascii(c)) << "Failed for character " << c;
}
// Test uppercase letters
for (char c = 'A'; c <= 'Z'; ++c) {
EXPECT_TRUE(is_alpha_numeric_ascii(c)) << "Failed for character " << c;
}
// Test digits
for (char c = '0'; c <= '9'; ++c) {
EXPECT_TRUE(is_alpha_numeric_ascii(c)) << "Failed for character " << c;
}
// Test non-alphanumeric ASCII
for (char c = 0; c < '0'; ++c) {
EXPECT_FALSE(is_alpha_numeric_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
for (char c = '9' + 1; c < 'A'; ++c) {
EXPECT_FALSE(is_alpha_numeric_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
for (char c = 'Z' + 1; c < 'a'; ++c) {
EXPECT_FALSE(is_alpha_numeric_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_alpha_numeric_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, TestIsWordCharAscii) {
// Test lowercase letters
for (char c = 'a'; c <= 'z'; ++c) {
EXPECT_TRUE(is_word_char_ascii(c)) << "Failed for character " << c;
}
// Test uppercase letters
for (char c = 'A'; c <= 'Z'; ++c) {
EXPECT_TRUE(is_word_char_ascii(c)) << "Failed for character " << c;
}
// Test digits
for (char c = '0'; c <= '9'; ++c) {
EXPECT_TRUE(is_word_char_ascii(c)) << "Failed for character " << c;
}
// Test underscore
EXPECT_TRUE(is_word_char_ascii('_')) << "Failed for underscore character";
// Test non-word ASCII
for (char c = 0; c < '0'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_word_char_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
for (char c = '9' + 1; c < 'A'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_word_char_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
for (char c = 'Z' + 1; c < 'a'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_word_char_ascii(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_word_char_ascii(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, TestIsValidIdentifierBegin) {
// Test lowercase letters
for (char c = 'a'; c <= 'z'; ++c) {
EXPECT_TRUE(is_valid_identifier_begin(c)) << "Failed for character " << c;
}
// Test uppercase letters
for (char c = 'A'; c <= 'Z'; ++c) {
EXPECT_TRUE(is_valid_identifier_begin(c)) << "Failed for character " << c;
}
// Test underscore
EXPECT_TRUE(is_valid_identifier_begin('_')) << "Failed for underscore character";
// Test digits (should return false)
for (char c = '0'; c <= '9'; ++c) {
EXPECT_FALSE(is_valid_identifier_begin(c)) << "Failed for character " << c;
}
// Test other ASCII characters
for (char c = 0; c < '0'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_valid_identifier_begin(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
for (char c = '9' + 1; c < 'A'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_valid_identifier_begin(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
for (char c = 'Z' + 1; c < 'a'; ++c) {
if (c != '_') {
EXPECT_FALSE(is_valid_identifier_begin(c))
<< "Failed for character with code " << static_cast<int>(c);
}
}
// Test non-ASCII characters
for (unsigned char c = 0x80; c < 0xFF; ++c) {
EXPECT_FALSE(is_valid_identifier_begin(static_cast<char>(c)))
<< "Failed for character with code " << static_cast<int>(c);
}
}
TEST_F(StringUtilsTest, replace_pattern_by_escape) {
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abcdef"}, 'A'), "abcdef");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abc^%def"}, '^'), "abc\\%def");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abc^^ef"}, '^'), "abc^ef");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abc^^^ef"}, '^'), "abc^^ef");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abc^^^_ef"}, '^'), "abc^\\_ef");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"abc^^^_^ef"}, '^'), "abc^\\_^ef");
EXPECT_EQ(replace_pattern_by_escape(StringRef {"\\abc^^^_^ef"}, '^'), "\\\\abc^\\_^ef");
}
} // namespace doris::vectorized