blob: f9dbb0383626279cfba60737a73a5a847116a2d3 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::any::Any;
use std::error::Error;
use std::panic::{catch_unwind, AssertUnwindSafe};
use datafusion::arrow::error::ArrowError;
use datafusion::error::DataFusionError;
use jni::JNIEnv;
pub type JniResult<T> = Result<T, Box<dyn Error + Send + Sync>>;
/// Run `f`, catching panics and translating its `Err` into the appropriate
/// Java exception. When the boxed error is a [`DataFusionError`], the variant
/// drives which Java exception class is thrown -- callers can `catch
/// (PlanException)` / `catch (ResourcesExhaustedException)` etc. without
/// scraping message strings. Anything else (including Rust panics) falls
/// through to the parent [`DataFusionException`] class.
pub fn try_unwrap_or_throw<T, F>(env: &mut JNIEnv, default: T, f: F) -> T
where
F: FnOnce(&mut JNIEnv) -> JniResult<T>,
{
match catch_unwind(AssertUnwindSafe(|| f(env))) {
Ok(Ok(value)) => value,
Ok(Err(boxed)) => {
// Try to recover the typed DataFusionError so we can pick a
// matching Java subclass. If the error came from elsewhere
// (jni::errors::Error, prost::DecodeError, a stringly-typed
// Err("...")?), it falls through to the parent class.
match boxed.downcast::<DataFusionError>() {
Ok(df_err) => {
// Walk the cause chain so a ResourcesExhausted / Plan /
// etc. wrapped in ArrowError::External (or any other
// upstream-mandated wrapper) is classified by its real
// root, not by the outer wrapper. Same shape as upstream's
// `DataFusionError::find_root()`. The thrown message
// preserves the outer error's full chain so wrapping
// context isn't lost; only the *class* picks the inner
// variant.
let class = classify(df_err.find_root());
throw(env, class, &df_err.to_string());
}
Err(other) => {
throw(env, PARENT, &other.to_string());
}
}
default
}
Err(panic) => {
// Rust panics are upstream-bug-shaped, not caller-actionable.
// Surface as the parent class with a `panic: ` prefix so the
// caller can grep without inspecting a typed subclass.
throw(env, PARENT, &format!("panic: {}", panic_message(&panic)));
default
}
}
}
const PARENT: &str = "org/apache/datafusion/DataFusionException";
/// Map a [`DataFusionError`] variant onto the Java exception class to throw.
/// Multiple Rust variants fold into a single Java class when they're the same
/// kind of problem from the caller's standpoint -- the proposed mapping is
/// the public Java contract; the underlying variant set is not.
///
/// Callers should pass the result of [`DataFusionError::find_root`] so a
/// `ResourcesExhausted` / `Plan` / etc. buried inside a wrapping `ArrowError`
/// or `Context(...)` is classified by its real root. Variants without a
/// clean caller-facing category fall through to the parent class.
fn classify(err: &DataFusionError) -> &'static str {
match err {
DataFusionError::Plan(_)
| DataFusionError::SQL(_, _)
| DataFusionError::SchemaError(_, _) => "org/apache/datafusion/PlanException",
DataFusionError::Execution(_)
| DataFusionError::ExecutionJoin(_)
| DataFusionError::External(_)
| DataFusionError::Ffi(_) => "org/apache/datafusion/ExecutionException",
DataFusionError::ResourcesExhausted(_) => {
"org/apache/datafusion/ResourcesExhaustedException"
}
DataFusionError::IoError(_)
| DataFusionError::ObjectStore(_)
| DataFusionError::ParquetError(_) => "org/apache/datafusion/IoException",
// The AvroError variant only exists when DataFusion is built with its
// `avro` feature, forwarded by this crate's own `avro` feature.
#[cfg(feature = "avro")]
DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
// ArrowError is a 21-variant grab bag -- only some of those variants
// are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute
// / Cast / InvalidArgument / Memory etc. are execution-time failures
// produced by a normal query (e.g. SELECT 1/0), not IO. Routing them
// through IoException would hide ordinary execution errors from
// callers catching ExecutionException.
DataFusionError::ArrowError(arrow_err, _) => classify_arrow(arrow_err),
DataFusionError::NotImplemented(_) => "org/apache/datafusion/NotImplementedException",
DataFusionError::Configuration(_) => "org/apache/datafusion/ConfigurationException",
// Variants with no clean caller-facing category -- Internal (upstream
// bug-shaped), Substrait, Collection (multi-error), and any new
// variants a future DataFusion bump introduces -- fall through to
// the parent. The catch-all is deliberate: "I don't know what to do
// with this" surfaces as the parent class rather than a wrong typed
// subclass. Wrapper variants like Context / Diagnostic / Shared also
// land here when find_root() can't dig past them, which is rare
// because find_root walks `source()`.
_ => PARENT,
}
}
/// Map an [`ArrowError`] variant onto the Java exception class to throw.
/// Only the genuinely IO-shaped variants (`IoError`, `IpcError`) land on
/// `IoException`; everything else is execution-time and routes through
/// `ExecutionException`. Schema/parse-shaped variants route through
/// `PlanException` so a malformed IPC schema or a parse error surfaces as a
/// query problem rather than an execution failure.
///
/// Variants without a clean caller-facing category (`CDataInterface`, the
/// various overflow/index-overflow markers) fall through to the parent.
fn classify_arrow(err: &ArrowError) -> &'static str {
match err {
ArrowError::IoError(_, _) | ArrowError::IpcError(_) => "org/apache/datafusion/IoException",
ArrowError::SchemaError(_) | ArrowError::ParseError(_) => {
"org/apache/datafusion/PlanException"
}
ArrowError::DivideByZero
| ArrowError::ArithmeticOverflow(_)
| ArrowError::ComputeError(_)
| ArrowError::CastError(_)
| ArrowError::InvalidArgumentError(_)
| ArrowError::MemoryError(_)
| ArrowError::CsvError(_)
| ArrowError::JsonError(_)
| ArrowError::AvroError(_)
| ArrowError::ParquetError(_)
| ArrowError::ExternalError(_) => "org/apache/datafusion/ExecutionException",
ArrowError::NotYetImplemented(_) => "org/apache/datafusion/NotImplementedException",
// CDataInterface, DictionaryKeyOverflowError, RunEndIndexOverflowError,
// OffsetOverflowError, and any new variants in future Arrow bumps fall
// through to the parent -- "I don't know what to do with this" stays
// at the parent rather than getting routed to the wrong subclass.
_ => PARENT,
}
}
fn throw(env: &mut JNIEnv, class: &str, message: &str) {
if env.exception_check().unwrap_or(false) {
return;
}
let _ = env.throw_new(class, message);
}
/// Best-effort extraction of a panic payload's message. `catch_unwind` hands
/// back a `Box<dyn Any>`; the payload is a `String` or `&str` for ordinary
/// `panic!`/`unwrap` sites, anything else is opaque.
pub fn panic_message(panic: &Box<dyn Any + Send>) -> String {
if let Some(s) = panic.downcast_ref::<String>() {
s.clone()
} else if let Some(s) = panic.downcast_ref::<&str>() {
(*s).to_string()
} else {
"rust panic with non-string payload".to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
use datafusion::arrow::error::ArrowError;
#[test]
fn classify_unwrapped_variants_picks_typed_class() {
// Sanity check: the simple unwrapped cases land on the right classes.
assert_eq!(
classify(&DataFusionError::Plan("x".into())),
"org/apache/datafusion/PlanException"
);
assert_eq!(
classify(&DataFusionError::ResourcesExhausted("x".into())),
"org/apache/datafusion/ResourcesExhaustedException"
);
assert_eq!(
classify(&DataFusionError::Configuration("x".into())),
"org/apache/datafusion/ConfigurationException"
);
}
#[test]
fn classify_unrecognised_variant_falls_through_to_parent() {
// `Internal` has no clean caller-facing category; must surface as
// the parent class rather than getting routed to a wrong subclass.
assert_eq!(classify(&DataFusionError::Internal("x".into())), PARENT);
}
#[test]
fn classify_via_find_root_unwraps_nested_resources_exhausted() {
// Codex regression: a DataFusionError::ResourcesExhausted buried
// inside an ArrowError::ExternalError(Box<DataFusionError::Context>)
// wrapper would otherwise land on the outer ArrowError arm and be
// classified as IoException -- the wrong typed exception. Confirm
// that calling classify on `find_root` recovers the inner variant
// (the same shape upstream's own find_root() doctest pins).
let inner = DataFusionError::ResourcesExhausted("memory pool full".into());
let contexted = DataFusionError::Context("aggregate stream".into(), Box::new(inner));
let arrow_wrapped = ArrowError::ExternalError(Box::new(contexted));
let outer = DataFusionError::ArrowError(Box::new(arrow_wrapped), None);
// Without find_root, the outer would route to IoException; with it,
// the real root (ResourcesExhausted) wins.
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/ResourcesExhaustedException"
);
}
#[test]
fn classify_via_find_root_unwraps_plan_through_context() {
// A Plan error wrapped only in DataFusionError::Context still has to
// reach PlanException, not the parent class.
let inner = DataFusionError::Plan("table 't' not found".into());
let contexted = DataFusionError::Context("logical planning".into(), Box::new(inner));
assert_eq!(
classify(contexted.find_root()),
"org/apache/datafusion/PlanException"
);
}
#[test]
fn arrow_divide_by_zero_routes_to_execution_not_io() {
// Codex regression: DataFusionError::ArrowError(ArrowError::DivideByZero)
// is the actual shape produced by `SELECT 1/0`. The 21-variant
// ArrowError grab bag means routing the whole ArrowError arm to
// IoException would put ordinary divide-by-zero / overflow / cast
// errors on the wrong typed exception. classify_arrow inspects the
// inner variant.
let outer = DataFusionError::ArrowError(Box::new(ArrowError::DivideByZero), None);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/ExecutionException"
);
let outer = DataFusionError::ArrowError(
Box::new(ArrowError::ArithmeticOverflow("i32 overflow".into())),
None,
);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/ExecutionException"
);
let outer = DataFusionError::ArrowError(
Box::new(ArrowError::CastError("Int32 -> Date32".into())),
None,
);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/ExecutionException"
);
}
#[test]
fn arrow_io_variants_still_route_to_io() {
// Genuinely IO-shaped ArrowError variants stay on IoException.
let io = std::io::Error::other("disk full");
let outer = DataFusionError::ArrowError(
Box::new(ArrowError::IoError("write failed".into(), io)),
None,
);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/IoException"
);
let outer =
DataFusionError::ArrowError(Box::new(ArrowError::IpcError("bad framing".into())), None);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/IoException"
);
}
#[test]
fn arrow_schema_and_parse_errors_route_to_plan() {
// SchemaError and ParseError are query-shaped problems (you wrote a
// bad query / supplied a bad schema), not execution failures.
let outer = DataFusionError::ArrowError(
Box::new(ArrowError::SchemaError("column 'foo' not found".into())),
None,
);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/PlanException"
);
let outer = DataFusionError::ArrowError(
Box::new(ArrowError::ParseError("expected int".into())),
None,
);
assert_eq!(
classify(outer.find_root()),
"org/apache/datafusion/PlanException"
);
}
}