| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <string> |
| #include <vector> |
| |
| #include <nlohmann/json.hpp> |
| |
| #include "iceberg/expression/json_serde_internal.h" |
| #include "iceberg/expression/literal.h" |
| #include "iceberg/expression/predicate.h" |
| #include "iceberg/expression/term.h" |
| #include "iceberg/schema.h" |
| #include "iceberg/transform.h" |
| #include "iceberg/util/checked_cast.h" |
| #include "iceberg/util/json_util_internal.h" |
| #include "iceberg/util/macros.h" |
| #include "iceberg/util/string_util.h" |
| #include "iceberg/util/transform_util.h" |
| |
| namespace iceberg { |
| namespace { |
| constexpr std::string_view kType = "type"; |
| constexpr std::string_view kTerm = "term"; |
| constexpr std::string_view kTransform = "transform"; |
| constexpr std::string_view kValue = "value"; |
| constexpr std::string_view kValues = "values"; |
| constexpr std::string_view kLeft = "left"; |
| constexpr std::string_view kRight = "right"; |
| constexpr std::string_view kChild = "child"; |
| constexpr std::string_view kTrue = "true"; |
| constexpr std::string_view kFalse = "false"; |
| constexpr std::string_view kEq = "eq"; |
| constexpr std::string_view kAnd = "and"; |
| constexpr std::string_view kOr = "or"; |
| constexpr std::string_view kNot = "not"; |
| constexpr std::string_view kIn = "in"; |
| constexpr std::string_view kNotIn = "not-in"; |
| constexpr std::string_view kLt = "lt"; |
| constexpr std::string_view kLtEq = "lt-eq"; |
| constexpr std::string_view kGt = "gt"; |
| constexpr std::string_view kGtEq = "gt-eq"; |
| constexpr std::string_view kNotEq = "not-eq"; |
| constexpr std::string_view kStartsWith = "starts-with"; |
| constexpr std::string_view kNotStartsWith = "not-starts-with"; |
| constexpr std::string_view kIsNull = "is-null"; |
| constexpr std::string_view kNotNull = "not-null"; |
| constexpr std::string_view kIsNan = "is-nan"; |
| constexpr std::string_view kNotNan = "not-nan"; |
| constexpr std::string_view kCount = "count"; |
| constexpr std::string_view kCountNull = "count-null"; |
| constexpr std::string_view kCountStar = "count-star"; |
| constexpr std::string_view kMin = "min"; |
| constexpr std::string_view kMax = "max"; |
| constexpr std::string_view kLiteral = "literal"; |
| constexpr std::string_view kReference = "reference"; |
| |
| /// Helper to build the transform JSON object shared by Unbound/BoundTransform |
| nlohmann::json MakeTransformJson(std::string_view transform_str, |
| std::string_view ref_name) { |
| nlohmann::json json; |
| json[kType] = kTransform; |
| json[kTransform] = transform_str; |
| json[kTerm] = ref_name; |
| return json; |
| } |
| |
| /// Helper to check if a JSON term represents a transform |
| bool IsTransformTerm(const nlohmann::json& json) { |
| return json.is_object() && json.contains(kType) && |
| json[kType].get<std::string>() == kTransform && json.contains(kTerm); |
| } |
| |
| /// Template helper to create predicates from JSON with the appropriate term type. |
| template <typename B> |
| Result<std::unique_ptr<UnboundPredicate>> PredicateFromJson( |
| Expression::Operation op, std::shared_ptr<UnboundTerm<B>> term, |
| const nlohmann::json& json, const Schema* schema) { |
| // Bind the term against the schema so we can pass the resolved type to |
| // LiteralFromJson for type-aware parsing. |
| std::shared_ptr<B> bound_term; |
| if (schema != nullptr) { |
| ICEBERG_ASSIGN_OR_RAISE(bound_term, term->Bind(*schema, /*case_sensitive=*/false)); |
| } |
| |
| // Helper that selects type-aware or naive literal parsing. |
| auto parse_literal = [&](const nlohmann::json& val) -> Result<Literal> { |
| if (bound_term != nullptr) { |
| return LiteralFromJson(val, bound_term->type().get()); |
| } |
| return LiteralFromJson(val); |
| }; |
| |
| if (IsUnaryOperation(op)) { |
| if (json.contains(kValue)) [[unlikely]] { |
| return JsonParseError("Unary predicate has invalid 'value' field: {}", |
| SafeDumpJson(json)); |
| } |
| if (json.contains(kValues)) [[unlikely]] { |
| return JsonParseError("Unary predicate has invalid 'values' field: {}", |
| SafeDumpJson(json)); |
| } |
| return UnboundPredicateImpl<B>::Make(op, std::move(term)); |
| } |
| |
| if (IsSetOperation(op)) { |
| std::vector<Literal> literals; |
| if (!json.contains(kValues) || !json[kValues].is_array() || json.contains(kValue)) |
| [[unlikely]] { |
| return JsonParseError( |
| "Set predicate must include an array 'values' field and must not include " |
| "'value': {}", |
| SafeDumpJson(json)); |
| } |
| for (const auto& val : json[kValues]) { |
| ICEBERG_ASSIGN_OR_RAISE(auto lit, parse_literal(val)); |
| literals.push_back(std::move(lit)); |
| } |
| return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literals)); |
| } |
| |
| // Literal predicate |
| if (!json.contains(kValue) || json.contains(kValues)) [[unlikely]] { |
| return JsonParseError( |
| "Literal predicate requires 'value' and must not include 'values': {}", |
| SafeDumpJson(json)); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto literal, parse_literal(json[kValue])); |
| return UnboundPredicateImpl<B>::Make(op, std::move(term), std::move(literal)); |
| } |
| } // namespace |
| |
| bool IsUnaryOperation(Expression::Operation op) { |
| switch (op) { |
| case Expression::Operation::kIsNull: |
| case Expression::Operation::kNotNull: |
| case Expression::Operation::kIsNan: |
| case Expression::Operation::kNotNan: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool IsSetOperation(Expression::Operation op) { |
| switch (op) { |
| case Expression::Operation::kIn: |
| case Expression::Operation::kNotIn: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| Result<Expression::Operation> OperationTypeFromJson(const nlohmann::json& json) { |
| if (!json.is_string()) [[unlikely]] { |
| return JsonParseError("Unable to create operation. Json value is not a string"); |
| } |
| auto typeStr = json.get<std::string>(); |
| if (typeStr == kTrue) return Expression::Operation::kTrue; |
| if (typeStr == kFalse) return Expression::Operation::kFalse; |
| if (typeStr == kAnd) return Expression::Operation::kAnd; |
| if (typeStr == kOr) return Expression::Operation::kOr; |
| if (typeStr == kNot) return Expression::Operation::kNot; |
| if (typeStr == kEq) return Expression::Operation::kEq; |
| if (typeStr == kNotEq) return Expression::Operation::kNotEq; |
| if (typeStr == kLt) return Expression::Operation::kLt; |
| if (typeStr == kLtEq) return Expression::Operation::kLtEq; |
| if (typeStr == kGt) return Expression::Operation::kGt; |
| if (typeStr == kGtEq) return Expression::Operation::kGtEq; |
| if (typeStr == kIn) return Expression::Operation::kIn; |
| if (typeStr == kNotIn) return Expression::Operation::kNotIn; |
| if (typeStr == kIsNull) return Expression::Operation::kIsNull; |
| if (typeStr == kNotNull) return Expression::Operation::kNotNull; |
| if (typeStr == kIsNan) return Expression::Operation::kIsNan; |
| if (typeStr == kNotNan) return Expression::Operation::kNotNan; |
| if (typeStr == kStartsWith) return Expression::Operation::kStartsWith; |
| if (typeStr == kNotStartsWith) return Expression::Operation::kNotStartsWith; |
| if (typeStr == kCount) return Expression::Operation::kCount; |
| if (typeStr == kCountNull) return Expression::Operation::kCountNull; |
| if (typeStr == kCountStar) return Expression::Operation::kCountStar; |
| if (typeStr == kMin) return Expression::Operation::kMin; |
| if (typeStr == kMax) return Expression::Operation::kMax; |
| |
| return JsonParseError("Unknown expression operation: '{}'", typeStr); |
| } |
| |
| nlohmann::json ToJson(Expression::Operation op) { |
| std::string json(ToString(op)); |
| std::ranges::transform(json, json.begin(), [](unsigned char c) -> char { |
| return (c == '_') ? '-' : static_cast<char>(std::tolower(c)); |
| }); |
| return json; |
| } |
| |
| nlohmann::json ToJson(const NamedReference& ref) { return ref.name(); } |
| |
| nlohmann::json ToJson(const UnboundTransform& transform) { |
| auto& mut = const_cast<UnboundTransform&>(transform); |
| return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name()); |
| } |
| |
| nlohmann::json ToJson(const BoundReference& ref) { return ref.name(); } |
| |
| nlohmann::json ToJson(const BoundTransform& transform) { |
| auto& mut = const_cast<BoundTransform&>(transform); |
| return MakeTransformJson(transform.transform()->ToString(), mut.reference()->name()); |
| } |
| |
| Result<std::unique_ptr<NamedReference>> NamedReferenceFromJson( |
| const nlohmann::json& json) { |
| if (json.is_object() && json.contains(kType) && |
| json[kType].get<std::string>() == kReference && json.contains(kTerm)) { |
| return NamedReference::Make(json[kTerm].get<std::string>()); |
| } |
| if (!json.is_string()) [[unlikely]] { |
| return JsonParseError("Expected string for named reference"); |
| } |
| return NamedReference::Make(json.get<std::string>()); |
| } |
| |
| Result<std::unique_ptr<UnboundTransform>> UnboundTransformFromJson( |
| const nlohmann::json& json) { |
| if (!IsTransformTerm(json)) { |
| return JsonParseError("Invalid unbound transform: {}", SafeDumpJson(json)); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto transform_str, |
| GetJsonValue<std::string>(json, kTransform)); |
| ICEBERG_ASSIGN_OR_RAISE(auto transform, TransformFromString(transform_str)); |
| ICEBERG_ASSIGN_OR_RAISE(auto ref, NamedReferenceFromJson(json[kTerm])); |
| return UnboundTransform::Make(std::move(ref), std::move(transform)); |
| } |
| |
| Result<nlohmann::json> ToJson(const Literal& literal) { |
| if (literal.IsNull()) { |
| return nlohmann::json(nullptr); |
| } |
| |
| const auto type_id = literal.type()->type_id(); |
| const auto& value = literal.value(); |
| |
| switch (type_id) { |
| case TypeId::kBoolean: |
| return nlohmann::json(std::get<bool>(value)); |
| case TypeId::kInt: |
| return nlohmann::json(std::get<int32_t>(value)); |
| case TypeId::kDate: |
| return nlohmann::json(TransformUtil::HumanDay(std::get<int32_t>(value))); |
| case TypeId::kLong: |
| return nlohmann::json(std::get<int64_t>(value)); |
| case TypeId::kTime: |
| return nlohmann::json(TransformUtil::HumanTime(std::get<int64_t>(value))); |
| case TypeId::kTimestamp: |
| return nlohmann::json(TransformUtil::HumanTimestamp(std::get<int64_t>(value))); |
| case TypeId::kTimestampTz: |
| return nlohmann::json( |
| TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value))); |
| case TypeId::kFloat: |
| return nlohmann::json(std::get<float>(value)); |
| case TypeId::kDouble: |
| return nlohmann::json(std::get<double>(value)); |
| case TypeId::kString: |
| return nlohmann::json(std::get<std::string>(value)); |
| case TypeId::kBinary: |
| case TypeId::kFixed: { |
| // base 16 encoding for binary data |
| const auto& bytes = std::get<std::vector<uint8_t>>(value); |
| std::string hex; |
| hex.reserve(bytes.size() * 2); |
| for (uint8_t byte : bytes) { |
| hex += std::format("{:02X}", byte); |
| } |
| return nlohmann::json(std::move(hex)); |
| } |
| case TypeId::kDecimal: |
| return nlohmann::json(literal.ToString()); |
| case TypeId::kUuid: |
| return nlohmann::json(std::get<Uuid>(value).ToString()); |
| default: |
| return NotSupported("Unsupported literal type for JSON serialization: {}", |
| literal.type()->ToString()); |
| } |
| } |
| |
| Result<Literal> LiteralFromJson(const nlohmann::json& json, const Type* /*type*/) { |
| // TODO(gangwu): implement type-aware literal parsing equivalent to Java's |
| // SingleValueParser.fromJson(type, node). |
| return LiteralFromJson(json); |
| } |
| |
| Result<Literal> LiteralFromJson(const nlohmann::json& json) { |
| // Unwrap {"type": "literal", "value": <actual>} wrapper |
| if (json.is_object() && json.contains(kType) && |
| json[kType].get<std::string>() == kLiteral && json.contains(kValue)) { |
| return LiteralFromJson(json[kValue]); |
| } |
| if (json.is_null()) { |
| return Literal::Null(nullptr); |
| } |
| if (json.is_boolean()) { |
| return Literal::Boolean(json.get<bool>()); |
| } |
| if (json.is_number_integer()) { |
| return Literal::Long(json.get<int64_t>()); |
| } |
| if (json.is_number_float()) { |
| return Literal::Double(json.get<double>()); |
| } |
| if (json.is_string()) { |
| return Literal::String(json.get<std::string>()); |
| } |
| return JsonParseError("Unsupported literal JSON: {}", SafeDumpJson(json)); |
| } |
| |
| Result<nlohmann::json> ToJson(const Term& term) { |
| switch (term.kind()) { |
| case Term::Kind::kReference: |
| if (term.is_unbound()) { |
| return ToJson(internal::checked_cast<const NamedReference&>(term)); |
| } |
| return ToJson(internal::checked_cast<const BoundReference&>(term)); |
| case Term::Kind::kTransform: |
| if (term.is_unbound()) { |
| return ToJson(internal::checked_cast<const UnboundTransform&>(term)); |
| } |
| return ToJson(internal::checked_cast<const BoundTransform&>(term)); |
| default: |
| return NotSupported("Unsupported term for JSON serialization: {}", term.ToString()); |
| } |
| } |
| |
| Result<nlohmann::json> ToJson(const UnboundPredicate& pred) { |
| nlohmann::json json; |
| json[kType] = ToJson(pred.op()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kTerm], ToJson(pred.unbound_term())); |
| |
| std::span<const Literal> literals = pred.literals(); |
| if (IsSetOperation(pred.op())) { |
| nlohmann::json values = nlohmann::json::array(); |
| for (const auto& lit : literals) { |
| ICEBERG_ASSIGN_OR_RAISE(auto lit_json, ToJson(lit)); |
| values.push_back(std::move(lit_json)); |
| } |
| json[kValues] = std::move(values); |
| } else if (!literals.empty()) { |
| ICEBERG_CHECK(literals.size() == 1, |
| "Expected exactly one literal for non-set predicate but got {}: {}", |
| literals.size(), pred.ToString()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kValue], ToJson(literals[0])); |
| } |
| return json; |
| } |
| |
| Result<nlohmann::json> ToJson(const BoundPredicate& pred) { |
| nlohmann::json json; |
| json[kType] = ToJson(pred.op()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kTerm], ToJson(*pred.term())); |
| |
| if (IsSetOperation(pred.op())) { |
| const auto& sp = internal::checked_cast<const BoundSetPredicate&>(pred); |
| nlohmann::json values = nlohmann::json::array(); |
| for (const auto& lit : sp.literal_set()) { |
| ICEBERG_ASSIGN_OR_RAISE(auto lit_json, ToJson(lit)); |
| values.push_back(std::move(lit_json)); |
| } |
| json[kValues] = std::move(values); |
| } else if (!IsUnaryOperation(pred.op())) { |
| const auto& lp = internal::checked_cast<const BoundLiteralPredicate&>(pred); |
| ICEBERG_ASSIGN_OR_RAISE(json[kValue], ToJson(lp.literal())); |
| } |
| return json; |
| } |
| |
| Result<std::unique_ptr<UnboundPredicate>> UnboundPredicateFromJson( |
| const nlohmann::json& json, const Schema* schema) { |
| if (!json.contains(kType) || !json.contains(kTerm)) [[unlikely]] { |
| return JsonParseError("Invalid predicate JSON: missing 'type' or 'term' field : {}", |
| SafeDumpJson(json)); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType])); |
| const auto& term_json = json[kTerm]; |
| |
| if (IsTransformTerm(term_json)) { |
| ICEBERG_ASSIGN_OR_RAISE(auto term, UnboundTransformFromJson(term_json)); |
| return PredicateFromJson<BoundTransform>(op, std::move(term), json, schema); |
| } |
| |
| ICEBERG_ASSIGN_OR_RAISE(auto term, NamedReferenceFromJson(term_json)); |
| return PredicateFromJson<BoundReference>(op, std::move(term), json, schema); |
| } |
| |
| Result<std::shared_ptr<Expression>> ExpressionFromJson(const nlohmann::json& json, |
| const Schema* schema) { |
| // Handle boolean constants |
| if (json.is_boolean()) { |
| return json.get<bool>() |
| ? internal::checked_pointer_cast<Expression>(True::Instance()) |
| : internal::checked_pointer_cast<Expression>(False::Instance()); |
| } |
| if (json.is_string()) { |
| auto s = StringUtils::ToLower(json.get<std::string>()); |
| if (s == kTrue) return True::Instance(); |
| if (s == kFalse) return False::Instance(); |
| return JsonParseError("Unknown expression string constant: {}", s); |
| } |
| |
| if (!json.is_object() || !json.contains(kType)) [[unlikely]] { |
| return JsonParseError("Expression JSON must be an object with a 'type' field: {}", |
| SafeDumpJson(json)); |
| } |
| |
| if (json[kType].get<std::string>() == kLiteral) { |
| if (!json.contains(kValue) || !json[kValue].is_boolean()) [[unlikely]] { |
| return JsonParseError( |
| "Expression of type 'literal' must have a boolean 'value' field: {}", |
| SafeDumpJson(json)); |
| } |
| return json[kValue].get<bool>() |
| ? internal::checked_pointer_cast<Expression>(True::Instance()) |
| : internal::checked_pointer_cast<Expression>(False::Instance()); |
| } |
| |
| ICEBERG_ASSIGN_OR_RAISE(auto op, OperationTypeFromJson(json[kType])); |
| |
| switch (op) { |
| case Expression::Operation::kAnd: { |
| if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] { |
| return JsonParseError("AND expression missing 'left' or 'right' field"); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft], schema)); |
| ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight], schema)); |
| return And::Make(std::move(left), std::move(right)); |
| } |
| case Expression::Operation::kOr: { |
| if (!json.contains(kLeft) || !json.contains(kRight)) [[unlikely]] { |
| return JsonParseError("OR expression missing 'left' or 'right' field"); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto left, ExpressionFromJson(json[kLeft], schema)); |
| ICEBERG_ASSIGN_OR_RAISE(auto right, ExpressionFromJson(json[kRight], schema)); |
| return Or::Make(std::move(left), std::move(right)); |
| } |
| case Expression::Operation::kNot: { |
| if (!json.contains(kChild)) [[unlikely]] { |
| return JsonParseError("NOT expression missing 'child' field"); |
| } |
| ICEBERG_ASSIGN_OR_RAISE(auto child, ExpressionFromJson(json[kChild], schema)); |
| return Not::Make(std::move(child)); |
| } |
| case Expression::Operation::kCount: |
| case Expression::Operation::kCountNull: |
| case Expression::Operation::kCountStar: |
| case Expression::Operation::kMin: |
| case Expression::Operation::kMax: { |
| // unsupported operations for JSON deserialization |
| return NotSupported("Unsupported expression type for JSON deserialization: {}", |
| ToString(op)); |
| } |
| default: |
| return UnboundPredicateFromJson(json, schema); |
| } |
| } |
| |
| Result<nlohmann::json> ToJson(const Expression& expr) { |
| switch (expr.op()) { |
| case Expression::Operation::kTrue: |
| return nlohmann::json(true); |
| case Expression::Operation::kFalse: |
| return nlohmann::json(false); |
| case Expression::Operation::kAnd: { |
| const auto& and_expr = internal::checked_cast<const And&>(expr); |
| nlohmann::json json; |
| json[kType] = ToJson(expr.op()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kLeft], ToJson(*and_expr.left())); |
| ICEBERG_ASSIGN_OR_RAISE(json[kRight], ToJson(*and_expr.right())); |
| return json; |
| } |
| case Expression::Operation::kOr: { |
| const auto& or_expr = internal::checked_cast<const Or&>(expr); |
| nlohmann::json json; |
| json[kType] = ToJson(expr.op()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kLeft], ToJson(*or_expr.left())); |
| ICEBERG_ASSIGN_OR_RAISE(json[kRight], ToJson(*or_expr.right())); |
| return json; |
| } |
| case Expression::Operation::kNot: { |
| const auto& not_expr = internal::checked_cast<const Not&>(expr); |
| nlohmann::json json; |
| json[kType] = ToJson(expr.op()); |
| ICEBERG_ASSIGN_OR_RAISE(json[kChild], ToJson(*not_expr.child())); |
| return json; |
| } |
| default: |
| if (expr.is_unbound_predicate()) { |
| return ToJson(dynamic_cast<const UnboundPredicate&>(expr)); |
| } |
| if (expr.is_bound_predicate()) { |
| return ToJson(dynamic_cast<const BoundPredicate&>(expr)); |
| } |
| return NotSupported("Unsupported expression type for JSON serialization"); |
| } |
| } |
| |
| } // namespace iceberg |