// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include <algorithm>
#include <cstdint>
#include <cstring>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <gtest/gtest.h>

#include "arrow/testing/gtest_util.h"
#include "arrow/util/trie.h"

namespace arrow {
namespace internal {

TEST(SmallString, Basics) {
  using SS = SmallString<5>;
  {
    SS s;
    ASSERT_EQ(s.length(), 0);
    ASSERT_EQ(util::string_view(s), util::string_view(""));
    ASSERT_EQ(s, "");
    ASSERT_NE(s, "x");
    ASSERT_EQ(sizeof(s), 6);
  }
  {
    SS s("abc");
    ASSERT_EQ(s.length(), 3);
    ASSERT_EQ(util::string_view(s), util::string_view("abc"));
    ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0);
    ASSERT_EQ(s, "abc");
    ASSERT_NE(s, "ab");
  }
}

TEST(SmallString, Assign) {
  using SS = SmallString<5>;
  auto s = SS();

  s = util::string_view("abc");
  ASSERT_EQ(s.length(), 3);
  ASSERT_EQ(util::string_view(s), util::string_view("abc"));
  ASSERT_EQ(std::memcmp(s.data(), "abc", 3), 0);
  ASSERT_EQ(s, "abc");
  ASSERT_NE(s, "ab");

  s = std::string("ghijk");
  ASSERT_EQ(s.length(), 5);
  ASSERT_EQ(util::string_view(s), util::string_view("ghijk"));
  ASSERT_EQ(std::memcmp(s.data(), "ghijk", 5), 0);
  ASSERT_EQ(s, "ghijk");
  ASSERT_NE(s, "");

  s = SS("xy");
  ASSERT_EQ(s.length(), 2);
  ASSERT_EQ(util::string_view(s), util::string_view("xy"));
  ASSERT_EQ(std::memcmp(s.data(), "xy", 2), 0);
  ASSERT_EQ(s, "xy");
  ASSERT_NE(s, "xyz");
}

TEST(SmallString, Substr) {
  using SS = SmallString<5>;
  {
    auto s = SS();
    ASSERT_EQ(s.substr(0), "");
    ASSERT_EQ(s.substr(0, 2), "");
  }
  {
    auto s = SS("abcd");
    ASSERT_EQ(s.substr(0), "abcd");
    ASSERT_EQ(s.substr(1), "bcd");
    ASSERT_EQ(s.substr(4), "");
    ASSERT_EQ(s.substr(0, 0), "");
    ASSERT_EQ(s.substr(0, 3), "abc");
    ASSERT_EQ(s.substr(0, 4), "abcd");
    ASSERT_EQ(s.substr(1, 0), "");
    ASSERT_EQ(s.substr(1, 2), "bc");
    ASSERT_EQ(s.substr(4, 0), "");
    ASSERT_EQ(s.substr(4, 1), "");
  }
}

static std::vector<std::string> AllNulls() {
  return {"#N/A",    "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND",
          "1.#QNAN", "N/A",      "NA",  "NULL",    "NaN",      "n/a",  "nan",  "null"};
}

static void TestTrieContents(const Trie& trie, const std::vector<std::string>& entries) {
  std::unordered_map<std::string, int32_t> control;
  auto n_entries = static_cast<int32_t>(entries.size());

  // Build control container
  for (int32_t i = 0; i < n_entries; ++i) {
    auto p = control.insert({entries[i], i});
    ASSERT_TRUE(p.second);
  }

  // Check all existing entries in trie
  for (int32_t i = 0; i < n_entries; ++i) {
    ASSERT_EQ(i, trie.Find(entries[i])) << "for string '" << entries[i] << "'";
  }

  auto CheckNotExists = [&control, &trie](const std::string& s) {
    auto p = control.find(s);
    if (p == control.end()) {
      ASSERT_EQ(-1, trie.Find(s)) << "for string '" << s << "'";
    }
  };

  // Check potentially nonexistent strings
  CheckNotExists("");
  CheckNotExists("X");
  CheckNotExists("abcdefxxxxxxxxxxxxxxx");

  // Check potentially nonexistent variations of existing entries
  for (const auto& e : entries) {
    CheckNotExists(e + "X");
    if (e.size() > 0) {
      CheckNotExists(e.substr(0, 1));
      auto prefix = e.substr(0, e.size() - 1);
      CheckNotExists(prefix);
      CheckNotExists(prefix + "X");
      auto split_at = e.size() / 2;
      CheckNotExists(e.substr(0, split_at) + 'x' + e.substr(split_at + 1));
    }
  }
}

static void TestTrieContents(const std::vector<std::string>& entries) {
  TrieBuilder builder;
  for (const auto& s : entries) {
    ASSERT_OK(builder.Append(s));
  }
  const Trie trie = builder.Finish();
  ASSERT_OK(trie.Validate());

  TestTrieContents(trie, entries);
}

TEST(Trie, Empty) {
  TrieBuilder builder;
  const Trie trie = builder.Finish();
  ASSERT_OK(trie.Validate());

  ASSERT_EQ(-1, trie.Find(""));
  ASSERT_EQ(-1, trie.Find("x"));
}

TEST(Trie, EmptyString) {
  TrieBuilder builder;
  ASSERT_OK(builder.Append(""));
  const Trie trie = builder.Finish();
  ASSERT_OK(trie.Validate());

  ASSERT_EQ(0, trie.Find(""));
  ASSERT_EQ(-1, trie.Find("x"));
}

TEST(Trie, LongString) {
  auto maxlen = static_cast<size_t>(std::numeric_limits<int16_t>::max());
  // Ensure we can insert strings with length up to maxlen
  for (auto&& length : {maxlen, maxlen - 1, maxlen / 2}) {
    TrieBuilder builder;
    std::string long_string(length, 'x');
    ASSERT_OK(builder.Append(""));
    ASSERT_OK(builder.Append(long_string));
    const Trie trie = builder.Finish();
    ASSERT_EQ(1, trie.Find(long_string));
  }

  // Ensure that the trie always returns false for strings with length > maxlen
  for (auto&& length : {maxlen, maxlen - 1, maxlen / 2, maxlen + 1, maxlen * 2}) {
    TrieBuilder builder;
    ASSERT_OK(builder.Append(""));
    const Trie trie = builder.Finish();
    std::string long_string(length, 'x');
    ASSERT_EQ(-1, trie.Find(long_string));
  }
}

TEST(Trie, Basics1) {
  TestTrieContents({"abc", "de", "f"});
  TestTrieContents({"abc", "de", "f", ""});
}

TEST(Trie, Basics2) {
  TestTrieContents({"a", "abc", "abcd", "abcdef"});
  TestTrieContents({"", "a", "abc", "abcd", "abcdef"});
}

TEST(Trie, Basics3) {
  TestTrieContents({"abcd", "ab", "a"});
  TestTrieContents({"abcd", "ab", "a", ""});
}

TEST(Trie, LongStrings) {
  TestTrieContents({"abcdefghijklmnopqr", "abcdefghijklmnoprq", "defghijklmnopqrst"});
  TestTrieContents({"abcdefghijklmnopqr", "abcdefghijklmnoprq", "abcde"});
}

TEST(Trie, NullChars) {
  const std::string empty;
  const std::string nul(1, '\x00');
  std::string a, b, c, d;
  a = "x" + nul + "y";
  b = "x" + nul + "z";
  c = nul + "y";
  d = nul;
  ASSERT_EQ(a.length(), 3);
  ASSERT_EQ(d.length(), 1);

  TestTrieContents({a, b, c, d});
  TestTrieContents({a, b, c});
  TestTrieContents({a, b, c, d, ""});
  TestTrieContents({a, b, c, ""});
  TestTrieContents({d, c, b, a});
  TestTrieContents({c, b, a});
  TestTrieContents({d, c, b, a, ""});
  TestTrieContents({c, b, a, ""});
}

TEST(Trie, NegativeChars) {
  // Test with characters >= 0x80 (to check the absence of sign issues)
  TestTrieContents({"\x7f\x80\x81\xff", "\x7f\x80\x81", "\x7f\xff\x81", "\xff\x80\x81"});
}

TEST(Trie, CSVNulls) { TestTrieContents(AllNulls()); }

TEST(Trie, Duplicates) {
  {
    TrieBuilder builder;
    ASSERT_OK(builder.Append("ab"));
    ASSERT_OK(builder.Append("abc"));
    ASSERT_RAISES(Invalid, builder.Append("abc"));
    ASSERT_OK(builder.Append("abcd"));
    ASSERT_RAISES(Invalid, builder.Append("ab"));
    ASSERT_OK(builder.Append("abcde"));
    const Trie trie = builder.Finish();

    TestTrieContents(trie, {"ab", "abc", "abcd", "abcde"});
  }
  {
    // With allow_duplicates = true
    TrieBuilder builder;
    ASSERT_OK(builder.Append("ab", true));
    ASSERT_OK(builder.Append("abc", true));
    ASSERT_OK(builder.Append("abc", true));
    ASSERT_OK(builder.Append("abcd", true));
    ASSERT_OK(builder.Append("ab", true));
    ASSERT_OK(builder.Append("abcde", true));
    const Trie trie = builder.Finish();

    TestTrieContents(trie, {"ab", "abc", "abcd", "abcde"});
  }
}

TEST(Trie, CapacityError) {
  // A trie uses 16-bit indices into various internal structures and
  // therefore has limited size available.
  TrieBuilder builder;
  uint8_t first, second, third;
  bool had_capacity_error = false;
  uint8_t s[] = "\x00\x00\x00\x00";

  for (first = 1; first < 125; ++first) {
    s[0] = first;
    for (second = 1; second < 125; ++second) {
      s[1] = second;
      for (third = 1; third < 125; ++third) {
        s[2] = third;
        auto st = builder.Append(reinterpret_cast<const char*>(s));
        if (st.IsCapacityError()) {
          ASSERT_GE(first, 2);
          had_capacity_error = true;
          break;
        } else {
          ASSERT_OK(st);
        }
      }
    }
  }
  ASSERT_TRUE(had_capacity_error) << "Should have produced CapacityError";
}

}  // namespace internal
}  // namespace arrow
