build: Cargo workspace + native-common extraction (1/6) (#104)
diff --git a/.cargo/config.toml b/.cargo/config.toml
new file mode 100644
index 0000000..d7e0ee2
--- /dev/null
+++ b/.cargo/config.toml
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Keep Cargo's workspace output out of `target/` so `mvn clean` (which deletes
+# the root `target/`) does not nuke the Rust build cache.
+[build]
+target-dir = "rust-target"
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index c5db936..da8e65a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -83,8 +83,8 @@
path: |
~/.cargo/registry
~/.cargo/git
- native/target
- key: ${{ runner.os }}-cargo-${{ hashFiles('native/Cargo.lock') }}
+ rust-target
+ key: ${{ runner.os }}-cargo-${{ hashFiles('Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
- name: Build native and run tests
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 4cf628f..952bf34 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -54,7 +54,7 @@
run: ./mvnw -q spotless:check
- name: Check Rust formatting
- run: cd native && cargo fmt --all -- --check
+ run: cargo fmt --all -- --check
clippy:
name: Clippy
@@ -81,9 +81,9 @@
path: |
~/.cargo/registry
~/.cargo/git
- native/target
- key: ${{ runner.os }}-clippy-${{ hashFiles('native/Cargo.lock') }}
+ rust-target
+ key: ${{ runner.os }}-clippy-${{ hashFiles('Cargo.lock') }}
restore-keys: ${{ runner.os }}-clippy-
- name: Run clippy
- run: cd native && cargo clippy --all-targets -- -D warnings
+ run: cargo clippy --workspace --all-targets -- -D warnings
diff --git a/.gitignore b/.gitignore
index 719a2a4..25c9216 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
target/
+rust-target/
*.class
.idea/
.vscode/
diff --git a/native/Cargo.lock b/Cargo.lock
similarity index 94%
rename from native/Cargo.lock
rename to Cargo.lock
index 96d2f9d..dbbfcde 100644
--- a/native/Cargo.lock
+++ b/Cargo.lock
@@ -98,9 +98,9 @@
[[package]]
name = "ar_archive_writer"
-version = "0.5.1"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b"
+checksum = "4087686b4b0a3427190bae57a1d9a478dbb2d40c5dc1bd6e2b6d797913bdd348"
dependencies = [
"object",
]
@@ -119,9 +119,9 @@
[[package]]
name = "arrow"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "607e64bb911ee4f90483e044fe78f175989148c2892e659a2cd25429e782ec54"
+checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -140,9 +140,9 @@
[[package]]
name = "arrow-arith"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e754319ed8a85d817fe7adf183227e0b5308b82790a737b426c1124626b48118"
+checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -154,9 +154,9 @@
[[package]]
name = "arrow-array"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "841321891f247aa86c6112c80d83d89cb36e0addd020fa2425085b8eb6c3f579"
+checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e"
dependencies = [
"ahash",
"arrow-buffer",
@@ -173,9 +173,9 @@
[[package]]
name = "arrow-buffer"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f955dfb73fae000425f49c8226d2044dab60fb7ad4af1e24f961756354d996c9"
+checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0"
dependencies = [
"bytes",
"half",
@@ -185,9 +185,9 @@
[[package]]
name = "arrow-cast"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca5e686972523798f76bef355145bc1ae25a84c731e650268d31ab763c701663"
+checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -207,9 +207,9 @@
[[package]]
name = "arrow-csv"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86c276756867fc8186ec380c72c290e6e3b23a1d4fb05df6b1d62d2e62666d48"
+checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de"
dependencies = [
"arrow-array",
"arrow-cast",
@@ -222,9 +222,9 @@
[[package]]
name = "arrow-data"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db3b5846209775b6dc8056d77ff9a032b27043383dd5488abd0b663e265b9373"
+checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -235,9 +235,9 @@
[[package]]
name = "arrow-ipc"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd8907ddd8f9fbabf91ec2c85c1d81fe2874e336d2443eb36373595e28b98dd5"
+checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -251,9 +251,9 @@
[[package]]
name = "arrow-json"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4518c59acc501f10d7dcae397fe12b8db3d81bc7de94456f8a58f9165d6f502"
+checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -276,9 +276,9 @@
[[package]]
name = "arrow-ord"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efa70d9d6b1356f1fb9f1f651b84a725b7e0abb93f188cf7d31f14abfa2f2e6f"
+checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -289,9 +289,9 @@
[[package]]
name = "arrow-row"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "faec88a945338192beffbbd4be0def70135422930caa244ac3cec0cd213b26b4"
+checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -302,9 +302,9 @@
[[package]]
name = "arrow-schema"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18aa020f6bc8e5201dcd2d4b7f98c68f8a410ef37128263243e6ff2a47a67d4f"
+checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed"
dependencies = [
"bitflags",
"serde_core",
@@ -313,9 +313,9 @@
[[package]]
name = "arrow-select"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a657ab5132e9c8ca3b24eb15a823d0ced38017fe3930ff50167466b02e2d592c"
+checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222"
dependencies = [
"ahash",
"arrow-array",
@@ -327,9 +327,9 @@
[[package]]
name = "arrow-string"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6de2efbbd1a9f9780ceb8d1ff5d20421b35863b361e3386b4f571f1fc69fcb8"
+checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -393,9 +393,9 @@
[[package]]
name = "autocfg"
-version = "1.5.0"
+version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
[[package]]
name = "base64"
@@ -419,9 +419,9 @@
[[package]]
name = "bitflags"
-version = "2.11.1"
+version = "2.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3"
+checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
[[package]]
name = "blake2"
@@ -457,9 +457,9 @@
[[package]]
name = "bon"
-version = "3.9.1"
+version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f47dbe92550676ee653353c310dfb9cf6ba17ee70396e1f7cf0a2020ad49b2fe"
+checksum = "b2f04f6fef12d70d42a77b1433c9e0f065238479a6cefc4f5bab105e9873a3c3"
dependencies = [
"bon-macros",
"rustversion",
@@ -467,9 +467,9 @@
[[package]]
name = "bon-macros"
-version = "3.9.1"
+version = "3.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "519bd3116aeeb42d5372c29d982d16d0170d3d4a5ed85fc7dd91642ffff3c67c"
+checksum = "7d0bd4c2f75335ad98052a37efb54f428b492f64340257143b3429c8a508fa7b"
dependencies = [
"darling",
"ident_case",
@@ -482,9 +482,9 @@
[[package]]
name = "brotli"
-version = "8.0.2"
+version = "8.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560"
+checksum = "8119e4516436f5708bbc474a9d395bf12f1b5395e93a92a56e647ac3388c8610"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
@@ -493,9 +493,9 @@
[[package]]
name = "brotli-decompressor"
-version = "5.0.0"
+version = "5.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03"
+checksum = "5962523e1b92ce1b5e793d9169b9943eece10d39f62550bc04bb605d75b94924"
dependencies = [
"alloc-no-stdlib",
"alloc-stdlib",
@@ -503,9 +503,9 @@
[[package]]
name = "bumpalo"
-version = "3.20.2"
+version = "3.20.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
[[package]]
name = "byteorder"
@@ -530,9 +530,9 @@
[[package]]
name = "cc"
-version = "1.2.62"
+version = "1.2.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98"
+checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -571,9 +571,9 @@
[[package]]
name = "chrono"
-version = "0.4.44"
+version = "0.4.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
+checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327"
dependencies = [
"iana-time-zone",
"num-traits",
@@ -789,9 +789,9 @@
[[package]]
name = "dashmap"
-version = "6.1.0"
+version = "6.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+checksum = "e6361d5c062261c78a176addb82d4c821ae42bed6089de0e12603cd25de2059c"
dependencies = [
"cfg-if",
"crossbeam-utils",
@@ -1306,6 +1306,7 @@
"arrow",
"async-trait",
"datafusion",
+ "datafusion-jni-common",
"datafusion-proto",
"datafusion-spark",
"datafusion-substrait",
@@ -1321,6 +1322,16 @@
]
[[package]]
+name = "datafusion-jni-common"
+version = "0.1.0"
+dependencies = [
+ "datafusion",
+ "futures",
+ "jni",
+ "tokio",
+]
+
+[[package]]
name = "datafusion-macros"
version = "53.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1607,9 +1618,9 @@
[[package]]
name = "displaydoc"
-version = "0.2.5"
+version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+checksum = "1ac70aa55017e108007fbaf5aa0f54b021c98f92ff8af59d42eda9da96e3dd4f"
dependencies = [
"proc-macro2",
"quote",
@@ -1624,9 +1635,9 @@
[[package]]
name = "either"
-version = "1.15.0"
+version = "1.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
[[package]]
name = "equivalent"
@@ -1932,9 +1943,9 @@
[[package]]
name = "http"
-version = "1.4.0"
+version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+checksum = "6970f50e31d6fc17d3fa27329444bfa74e196cf62e95052a3f6fee181dba6425"
dependencies = [
"bytes",
"itoa",
@@ -1977,9 +1988,9 @@
[[package]]
name = "hyper"
-version = "1.9.0"
+version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
+checksum = "55281c53a1894c864990125767da440a4e630446785086f52523b20033b74498"
dependencies = [
"atomic-waker",
"bytes",
@@ -2269,13 +2280,12 @@
[[package]]
name = "js-sys"
-version = "0.3.98"
+version = "0.3.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08"
+checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162"
dependencies = [
"cfg-if",
"futures-util",
- "once_cell",
"wasm-bindgen",
]
@@ -2344,9 +2354,9 @@
[[package]]
name = "libbz2-rs-sys"
-version = "0.2.3"
+version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3a6a8c165077efc8f3a971534c50ea6a1a18b329ef4a66e897a7e3a1494565f"
+checksum = "34b357333733e8260735ba5894eb928c02ecc69c78715f01a8019e7fa7f2db4c"
[[package]]
name = "libc"
@@ -2403,9 +2413,9 @@
[[package]]
name = "log"
-version = "0.4.29"
+version = "0.4.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
[[package]]
name = "lru-slab"
@@ -2434,9 +2444,9 @@
[[package]]
name = "memchr"
-version = "2.8.0"
+version = "2.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
[[package]]
name = "miniz_oxide"
@@ -2450,9 +2460,9 @@
[[package]]
name = "mio"
-version = "1.2.0"
+version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+checksum = "02bd0af71c67b473010cbbc60715ee815645a4dc942899111f494b4b737d6fda"
dependencies = [
"libc",
"wasi",
@@ -2598,9 +2608,9 @@
[[package]]
name = "parquet"
-version = "58.2.0"
+version = "58.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d7efd3052f7d6ef601085559a246bc991e9a8cc77e02753737df6322ce35f1"
+checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908"
dependencies = [
"ahash",
"arrow-array",
@@ -2762,9 +2772,9 @@
[[package]]
name = "prost"
-version = "0.14.3"
+version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568"
+checksum = "528ac67416ff8646872a3c02cad9cc4ee5dc9f9540c9b10771855c95cb2e5ae1"
dependencies = [
"bytes",
"prost-derive",
@@ -2772,9 +2782,9 @@
[[package]]
name = "prost-build"
-version = "0.14.3"
+version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7"
+checksum = "03da047801ff44bb6a4d407d4860c05fd70bb81714e6b2f3812603d5b145b042"
dependencies = [
"heck",
"itertools",
@@ -2791,9 +2801,9 @@
[[package]]
name = "prost-derive"
-version = "0.14.3"
+version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b"
+checksum = "b570b25f7617e43d59005d0990ccb79e950a423952cea19671b7a876da390adf"
dependencies = [
"anyhow",
"itertools",
@@ -2804,9 +2814,9 @@
[[package]]
name = "prost-types"
-version = "0.14.3"
+version = "0.14.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7"
+checksum = "f94967dc7688f3054c7fac87473ffae4cc4c3904800e2d9f5b857246d8963b0a"
dependencies = [
"prost",
]
@@ -3063,9 +3073,9 @@
[[package]]
name = "regex"
-version = "1.12.3"
+version = "1.12.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
dependencies = [
"aho-corasick",
"memchr",
@@ -3092,9 +3102,9 @@
[[package]]
name = "regex-syntax"
-version = "0.8.10"
+version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
[[package]]
name = "regress"
@@ -3206,9 +3216,9 @@
[[package]]
name = "rustls-native-certs"
-version = "0.8.3"
+version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "612460d5f7bea540c490b2b6395d8e34a953e52b491accd6c86c8164c5932a63"
+checksum = "dab5152771c58876a2146916e53e35057e1a4dfa2b9df0f0305b07f611fdea4d"
dependencies = [
"openssl-probe",
"rustls-pki-types",
@@ -3389,9 +3399,9 @@
[[package]]
name = "serde_json"
-version = "1.0.149"
+version = "1.0.150"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
dependencies = [
"itoa",
"memchr",
@@ -3461,9 +3471,9 @@
[[package]]
name = "shlex"
-version = "1.3.0"
+version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
[[package]]
name = "simd-adler32"
@@ -3503,9 +3513,9 @@
[[package]]
name = "socket2"
-version = "0.6.3"
+version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+checksum = "52d1cfed4120b4d927bf7c0f86d2087a4a7d6027c906d9f9d525a80573b9be51"
dependencies = [
"libc",
"windows-sys 0.61.2",
@@ -3900,9 +3910,9 @@
[[package]]
name = "typenum"
-version = "1.20.0"
+version = "1.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
[[package]]
name = "typify"
@@ -3959,9 +3969,9 @@
[[package]]
name = "unicode-segmentation"
-version = "1.13.2"
+version = "1.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
+checksum = "c6f5d3c3b1bf09027a88a6bc961fc00497d651009560b5463668dc81b0fa87a8"
[[package]]
name = "unicode-width"
@@ -4007,9 +4017,9 @@
[[package]]
name = "uuid"
-version = "1.23.1"
+version = "1.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
+checksum = "144d6b123cef80b301b8f72a9e2ca4370ddec21950d0a103dd22c437006d2db7"
dependencies = [
"getrandom 0.4.2",
"js-sys",
@@ -4068,9 +4078,9 @@
[[package]]
name = "wasm-bindgen"
-version = "0.2.121"
+version = "0.2.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790"
+checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563"
dependencies = [
"cfg-if",
"once_cell",
@@ -4081,9 +4091,9 @@
[[package]]
name = "wasm-bindgen-futures"
-version = "0.4.71"
+version = "0.4.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8"
+checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -4091,9 +4101,9 @@
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.121"
+version = "0.2.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578"
+checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -4101,9 +4111,9 @@
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.121"
+version = "0.2.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2"
+checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b"
dependencies = [
"bumpalo",
"proc-macro2",
@@ -4114,9 +4124,9 @@
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.121"
+version = "0.2.123"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441"
+checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92"
dependencies = [
"unicode-ident",
]
@@ -4170,9 +4180,9 @@
[[package]]
name = "web-sys"
-version = "0.3.98"
+version = "0.3.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa"
+checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -4580,9 +4590,9 @@
[[package]]
name = "yoke"
-version = "0.8.2"
+version = "0.8.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
+checksum = "709fe23a0424b6a435d82152b1bd3fdfb0833487d5fa90d05d42762a9891fef5"
dependencies = [
"stable_deref_trait",
"yoke-derive",
@@ -4603,18 +4613,18 @@
[[package]]
name = "zerocopy"
-version = "0.8.48"
+version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
dependencies = [
"zerocopy-derive",
]
[[package]]
name = "zerocopy-derive"
-version = "0.8.48"
+version = "0.8.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
dependencies = [
"proc-macro2",
"quote",
@@ -4623,9 +4633,9 @@
[[package]]
name = "zerofrom"
-version = "0.1.7"
+version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
+checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272"
dependencies = [
"zerofrom-derive",
]
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..fd1971a
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[workspace]
+resolver = "2"
+members = [
+ "native",
+ "native-common",
+]
+
+# Shared package metadata so every crate moves in lock step. Members inherit
+# via `version.workspace = true` / `edition.workspace = true` etc.; a single
+# bump here re-versions the whole workspace.
+[workspace.package]
+version = "0.1.0"
+edition = "2021"
+license = "Apache-2.0"
+repository = "https://github.com/apache/datafusion-java"
+
+# Every dependency used by any workspace member is declared here so version
+# bumps live in one place and the resolver picks a single version of each
+# crate across the workspace. Members reference these via `{ workspace = true }`
+# and add per-crate flags (optional, features, default-features) at the use
+# site.
+[workspace.dependencies]
+arrow = { version = "58", features = ["ffi"] }
+async-trait = "0.1"
+datafusion = { version = "53.1.0" }
+datafusion-proto = "53.1.0"
+datafusion-spark = "53.1.0"
+datafusion-substrait = "53.1.0"
+futures = "0.3"
+jni = "0.21"
+# Pinned to the major DataFusion 53.1 pulls in transitively (0.13.x) so we
+# share the same `dyn ObjectStore` vtable and don't double-link.
+object_store = { version = "0.13", default-features = false }
+prost = "0.14"
+prost-build = "0.14"
+protoc-bin-vendored = "3"
+tokio = { version = "1", features = ["rt-multi-thread"] }
+# Optional, cfg-gated. See `native/Cargo.toml` for the build-flag dance.
+tokio-metrics = "0.5"
+url = "2"
diff --git a/Makefile b/Makefile
index 6d9b0ae..d6bcf2c 100644
--- a/Makefile
+++ b/Makefile
@@ -20,14 +20,14 @@
all: native jvm
native:
- cd native && cargo build
+ cargo build --workspace
-# Build the native crate with the `runtime-metrics` Cargo feature enabled.
+# Build the JNI crate with the `runtime-metrics` Cargo feature enabled.
# Requires `--cfg tokio_unstable` because tokio-metrics gates its API there.
# Default `make native` does not pull this in; callers who need
# SessionContext.runtimeStats() pick this target explicitly.
native-runtime-metrics:
- cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+ RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
jvm:
./mvnw package -DskipTests
@@ -39,10 +39,10 @@
# `:check` form inline in .github/workflows/lint.yml.
format:
./mvnw -q spotless:apply
- cd native && cargo fmt --all
+ cargo fmt --all
clean:
- cd native && cargo clean
+ cargo clean
./mvnw clean
tpch-data:
diff --git a/core/pom.xml b/core/pom.xml
index 5ddf107..1e25736 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -102,8 +102,8 @@
<configuration>
<target>
<property name="datafusion.native.lib.source"
- value="${maven.multiModuleProjectDirectory}/native/target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
- <fail message="Native library not found at ${datafusion.native.lib.source}. Run 'cd native && cargo build' (or 'make') before building the JAR.">
+ value="${maven.multiModuleProjectDirectory}/rust-target/${datafusion.native.profile}/${datafusion.lib.filename}"/>
+ <fail message="Native library not found at ${datafusion.native.lib.source}. Run 'cargo build -p datafusion-jni' (or 'make') before building the JAR.">
<condition><not><available file="${datafusion.native.lib.source}"/></not></condition>
</fail>
<mkdir dir="${project.build.outputDirectory}/org/apache/datafusion/${datafusion.lib.os}/${datafusion.lib.arch}"/>
diff --git a/core/src/main/java/org/apache/datafusion/SessionContext.java b/core/src/main/java/org/apache/datafusion/SessionContext.java
index ec0bd85..b68cda5 100644
--- a/core/src/main/java/org/apache/datafusion/SessionContext.java
+++ b/core/src/main/java/org/apache/datafusion/SessionContext.java
@@ -113,10 +113,11 @@
* other Substrait-emitting tool — and hand them to DataFusion without round-tripping through SQL.
*
* <p>Substrait support is gated behind the {@code substrait} Cargo feature on the native crate
- * and is <strong>off by default</strong>. Rebuild the native crate with {@code cargo build
- * --features substrait} (or {@code cargo build --features substrait,protoc} for hermetic builds
- * that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native binary
- * built without the feature, this method throws {@link RuntimeException} pointing at the flag.
+ * and is <strong>off by default</strong>. Rebuild the native crate with {@code cargo build -p
+ * datafusion-jni --features substrait} (or {@code ... --features substrait,protoc} for hermetic
+ * builds that vendor {@code protoc} via {@code cmake}) to enable it. If invoked against a native
+ * binary built without the feature, this method throws {@link RuntimeException} pointing at the
+ * flag.
*
* @throws IllegalArgumentException if {@code planBytes} is {@code null}.
* @throws IllegalStateException if this context is closed.
@@ -183,7 +184,7 @@
* Rebuild with:
*
* <pre>{@code
- * RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
* }</pre>
*
* <p>If invoked against a native binary built without the feature, this method throws {@link
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
index 120d179..d567275 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextRuntimeStatsTest.java
@@ -37,7 +37,7 @@
* #checkFeatureEnabled}. Run
*
* <pre>{@code
- * (cd native && RUSTFLAGS="--cfg tokio_unstable" cargo build --features runtime-metrics)
+ * RUSTFLAGS="--cfg tokio_unstable" cargo build -p datafusion-jni --features runtime-metrics
* }</pre>
*
* before {@code ./mvnw test} to exercise this class.
diff --git a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
index 34db3b5..a2cfb0a 100644
--- a/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
+++ b/core/src/test/java/org/apache/datafusion/SessionContextSubstraitTest.java
@@ -50,7 +50,7 @@
*
* <p>The {@code substrait} Cargo feature is off by default in {@code native/Cargo.toml}; if the
* native crate was built without it, every test here is skipped (see {@link #checkFeatureEnabled}).
- * Run {@code (cd native && cargo build --features substrait)} before {@code ./mvnw test} to
+ * Run {@code cargo build -p datafusion-jni --features substrait} before {@code ./mvnw test} to
* exercise this class.
*/
class SessionContextSubstraitTest {
diff --git a/dev/release/build-release.sh b/dev/release/build-release.sh
index 2b033bb..4d4ab13 100755
--- a/dev/release/build-release.sh
+++ b/dev/release/build-release.sh
@@ -135,26 +135,28 @@
mkdir -p "$JVM_TARGET_DIR/linux/amd64"
docker cp \
- "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+ "$CONTAINER_AMD64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
"$JVM_TARGET_DIR/linux/amd64/"
mkdir -p "$JVM_TARGET_DIR/linux/aarch64"
docker cp \
- "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/native/target/release/libdatafusion_jni.so" \
+ "$CONTAINER_ARM64:/opt/datafusion-java-rm/datafusion-java/rust-target/release/libdatafusion_jni.so" \
"$JVM_TARGET_DIR/linux/aarch64/"
echo "Building macOS native libs on the host (host=$HOST_ARCH)"
rustup target add "$OTHER_DARWIN_TARGET"
-(cd "$PROJECT_HOME/native" && cargo build --release)
-(cd "$PROJECT_HOME/native" && cargo build --release --target "$OTHER_DARWIN_TARGET")
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni)
+(cd "$PROJECT_HOME" && cargo build --release -p datafusion-jni --target "$OTHER_DARWIN_TARGET")
mkdir -p "$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/release/libdatafusion_jni.dylib" \
"$JVM_TARGET_DIR/darwin/$HOST_DARWIN_DIR/"
mkdir -p "$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR"
-cp "$PROJECT_HOME/native/target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
+cp "$PROJECT_HOME/rust-target/$OTHER_DARWIN_TARGET/release/libdatafusion_jni.dylib" \
"$JVM_TARGET_DIR/darwin/$OTHER_DARWIN_DIR/"
echo "Installing JAR into local Maven repo"
diff --git a/dev/release/datafusion-java-rm/build-native-libs.sh b/dev/release/datafusion-java-rm/build-native-libs.sh
index 5f273cc..79f8ae0 100755
--- a/dev/release/datafusion-java-rm/build-native-libs.sh
+++ b/dev/release/datafusion-java-rm/build-native-libs.sh
@@ -38,8 +38,9 @@
cd datafusion-java
git checkout "$BRANCH"
-cd native
-cargo build --release
+# Cargo writes to the workspace `rust-target/` dir (set in .cargo/config.toml),
+# not the per-crate `native/target/`, so build from the repo root.
+cargo build --release -p datafusion-jni
-echo "Built $(pwd)/target/release/libdatafusion_jni.so"
-ls -l target/release/libdatafusion_jni.so
+echo "Built $(pwd)/rust-target/release/libdatafusion_jni.so"
+ls -l rust-target/release/libdatafusion_jni.so
diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt
index 81d83e8..3dbd90f 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -7,7 +7,7 @@
.mvn/wrapper/maven-wrapper.properties
mvnw
mvnw.cmd
-native/Cargo.lock
+Cargo.lock
dev/release/rat_exclude_files.txt
docs/source/_static/**
docs/source/conf.py
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index e486adc..c7767bf 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -150,7 +150,8 @@
# raises on any formatting errors
rustup component add rustfmt
- (cd native && cargo fmt --all -- --check)
+ # Workspace-wide: covers native, native-common, and any future members.
+ cargo fmt --all -- --check
# build native + JVM and run the full test suite
make test
diff --git a/docs/source/contributor-guide/development.md b/docs/source/contributor-guide/development.md
index 984d77c..61d4fb0 100644
--- a/docs/source/contributor-guide/development.md
+++ b/docs/source/contributor-guide/development.md
@@ -42,7 +42,7 @@
be run individually:
```sh
-cd native && cargo build
+cargo build --workspace
./mvnw test
```
@@ -74,6 +74,11 @@
The repository is a multi-module Maven build:
+- `Cargo.toml` — Rust workspace root declaring the crate members
+ (`native`, `native-common`) and `[workspace.dependencies]` that pin
+ shared versions in one place. Cargo writes artifacts to `rust-target/`
+ (overridden in `.cargo/config.toml`) so `mvn clean` at the repo root does
+ not nuke the Rust build cache.
- `pom.xml` — parent POM declaring the `core` and `examples` modules and
shared plugin/dependency versions.
- `core/` — `datafusion-java` library module (Java sources, tests, and
@@ -81,7 +86,10 @@
- `examples/` — `datafusion-java-examples` module containing runnable
examples that depend on the library; built alongside the library so they
cannot fall out of sync with the API.
-- `native/` — Rust crate (JNI + Arrow C Data Interface).
+- `native/` — `datafusion-jni` Rust crate (JNI + Arrow C Data Interface).
+- `native-common/` — `datafusion-jni-common` Rust crate: JNI plumbing
+ shared across native crates (error→exception mapping, the per-cdylib
+ Tokio runtime singleton, the async-stream→`FFI_ArrowArrayStream` bridge).
- `proto/` — Protobuf definitions shared between Java and Rust.
- `Makefile` — top-level build orchestration (`make test`, `make format`,
`make tpch-data`).
diff --git a/docs/source/contributor-guide/updating-datafusion-version.md b/docs/source/contributor-guide/updating-datafusion-version.md
index 56d50dc..6e3b90b 100644
--- a/docs/source/contributor-guide/updating-datafusion-version.md
+++ b/docs/source/contributor-guide/updating-datafusion-version.md
@@ -21,7 +21,9 @@
Three things must move together when bumping DataFusion:
-1. `native/Cargo.toml` — the `datafusion` crate dependency.
+1. `Cargo.toml` (workspace root) — the `datafusion`, `datafusion-proto`,
+ `datafusion-spark`, and `datafusion-substrait` entries in
+ `[workspace.dependencies]`. Members inherit from there.
2. `pom.xml` — the `<datafusion.version>` Maven property. **Must equal
the Cargo version**; a mismatch means JVM-built protobuf plans won't
deserialize on the native side.
@@ -32,9 +34,9 @@
## Recipe
```sh
-# 1. Bump the Cargo dep
-$EDITOR native/Cargo.toml # set datafusion = "<new>"
-(cd native && cargo update -p datafusion)
+# 1. Bump the workspace dep
+$EDITOR Cargo.toml # set datafusion = "<new>" in [workspace.dependencies]
+cargo update -p datafusion
# 2. Bump the Maven property to match
$EDITOR pom.xml # set <datafusion.version>
diff --git a/native-common/Cargo.toml b/native-common/Cargo.toml
new file mode 100644
index 0000000..21a2296
--- /dev/null
+++ b/native-common/Cargo.toml
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "datafusion-jni-common"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# Implementation detail of datafusion-java's native crates, not a standalone
+# crates.io library. Matches `publish = false` on the `datafusion-jni` crate.
+publish = false
+readme = "README.md"
+description = "Shared JNI plumbing for DataFusion Java native crates: error-to-exception mapping, the per-cdylib Tokio runtime singleton, and the async-stream-to-FFI_ArrowArrayStream bridge."
+
+[features]
+# `datafusion-jni` builds DataFusion with `avro`, which adds the
+# `DataFusionError::AvroError` variant our classifier maps to IoException.
+# Feature-forwarded so consumers that don't read Avro (the Spark helper)
+# don't pull the apache-avro stack into their cdylib.
+avro = ["datafusion/avro"]
+
+[dependencies]
+datafusion = { workspace = true }
+futures = { workspace = true }
+jni = { workspace = true }
+tokio = { workspace = true }
diff --git a/native-common/README.md b/native-common/README.md
new file mode 100644
index 0000000..aadf877
--- /dev/null
+++ b/native-common/README.md
@@ -0,0 +1,37 @@
+<!---
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+# datafusion-jni-common
+
+Shared JNI plumbing for the [Apache DataFusion Java](https://github.com/apache/datafusion-java)
+native crates. It holds the pieces every DataFusion-backed `cdylib` loaded into a
+JVM needs, factored out so they live in one place.
+
+## Linking model
+
+Each consuming `cdylib` statically links its own copy of this crate, so the
+runtime singleton is per-library, not per-process. Nothing here is exported with
+`#[no_mangle]`, so linking it into several `cdylib`s loaded in one JVM cannot
+collide.
+
+## Status
+
+This crate is an implementation detail of Apache DataFusion Java. Its API may
+change between releases to track the needs of the native crates that depend on
+it.
diff --git a/native/src/errors.rs b/native-common/src/errors.rs
similarity index 95%
rename from native/src/errors.rs
rename to native-common/src/errors.rs
index d926544..f9dbb03 100644
--- a/native/src/errors.rs
+++ b/native-common/src/errors.rs
@@ -96,8 +96,11 @@
}
DataFusionError::IoError(_)
| DataFusionError::ObjectStore(_)
- | DataFusionError::ParquetError(_)
- | DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
+ | DataFusionError::ParquetError(_) => "org/apache/datafusion/IoException",
+ // The AvroError variant only exists when DataFusion is built with its
+ // `avro` feature, forwarded by this crate's own `avro` feature.
+ #[cfg(feature = "avro")]
+ DataFusionError::AvroError(_) => "org/apache/datafusion/IoException",
// ArrowError is a 21-variant grab bag -- only some of those variants
// are actually IO-shaped. DivideByZero / ArithmeticOverflow / Compute
// / Cast / InvalidArgument / Memory etc. are execution-time failures
@@ -161,7 +164,10 @@
let _ = env.throw_new(class, message);
}
-fn panic_message(panic: &Box<dyn Any + Send>) -> String {
+/// Best-effort extraction of a panic payload's message. `catch_unwind` hands
+/// back a `Box<dyn Any>`; the payload is a `String` or `&str` for ordinary
+/// `panic!`/`unwrap` sites, anything else is opaque.
+pub fn panic_message(panic: &Box<dyn Any + Send>) -> String {
if let Some(s) = panic.downcast_ref::<String>() {
s.clone()
} else if let Some(s) = panic.downcast_ref::<&str>() {
diff --git a/native-common/src/lib.rs b/native-common/src/lib.rs
new file mode 100644
index 0000000..ba47004
--- /dev/null
+++ b/native-common/src/lib.rs
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! JNI plumbing shared by this workspace's native crates (`datafusion-jni`
+//! and `datafusion-spark-bridge`, and through the latter every bridge
+//! cdylib): the error-to-Java-exception mapping, the per-cdylib Tokio
+//! runtime singleton, and the async-stream-to-`FFI_ArrowArrayStream`
+//! bridge.
+//!
+//! Each cdylib statically links its own copy of this rlib, so [`runtime`] is
+//! a per-cdylib singleton -- exactly the behaviour each crate had when this
+//! code lived inline. Nothing here is exported with `#[no_mangle]`, so
+//! linking this crate into several cdylibs loaded in one JVM cannot collide.
+
+pub mod errors;
+
+use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::sync::OnceLock;
+
+use datafusion::arrow::array::RecordBatch;
+use datafusion::arrow::datatypes::SchemaRef;
+use datafusion::arrow::error::ArrowError;
+use datafusion::arrow::record_batch::RecordBatchReader;
+use datafusion::execution::SendableRecordBatchStream;
+use futures::StreamExt;
+use tokio::runtime::{Handle, Runtime};
+
+static RT: OnceLock<Runtime> = OnceLock::new();
+
+/// The cdylib-wide Tokio runtime.
+pub fn runtime() -> &'static Runtime {
+ runtime_with_init(|_| {})
+}
+
+/// Same singleton as [`runtime`], with a hook that runs exactly once, when
+/// the runtime is created. `datafusion-jni` uses it to install its
+/// runtime-metrics accumulator so the sampling baseline coincides with
+/// runtime start; every later call (either entry point) returns the existing
+/// runtime without invoking the hook.
+pub fn runtime_with_init(init: impl FnOnce(&Handle)) -> &'static Runtime {
+ RT.get_or_init(|| {
+ let rt = Runtime::new().expect("failed to create Tokio runtime");
+ init(rt.handle());
+ rt
+ })
+}
+
+/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
+/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
+/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
+/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
+/// executor pipeline plus a single in-flight batch.
+pub struct StreamingReader {
+ pub schema: SchemaRef,
+ pub stream: SendableRecordBatchStream,
+}
+
+impl Iterator for StreamingReader {
+ type Item = Result<RecordBatch, ArrowError>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
+ // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
+ // here (buggy UDF, arrow cast that panics, runtime poison) would
+ // unwind across C/FFI -- undefined behaviour. Catch it and surface as
+ // an ArrowError so the Java side sees a normal exception instead.
+ let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
+ match next {
+ Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
+ Err(panic) => {
+ let msg = errors::panic_message(&panic);
+ Some(Err(ArrowError::ExternalError(
+ format!("panic in DataFrame stream: {msg}").into(),
+ )))
+ }
+ }
+ }
+}
+
+impl RecordBatchReader for StreamingReader {
+ fn schema(&self) -> SchemaRef {
+ self.schema.clone()
+ }
+}
diff --git a/native/Cargo.toml b/native/Cargo.toml
index 0362ae6..c040448 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -17,14 +17,17 @@
[package]
name = "datafusion-jni"
-version = "0.1.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+repository.workspace = true
+# cdylib JNI artifact loaded by the JVM, not a crates.io library.
publish = false
[lib]
# `rlib` alongside `cdylib` so `cargo test` has a Rust-level harness for
-# native-only invariants (e.g. error-classification routing through wrapped
-# DataFusionError chains). The `cdylib` is still the artifact the JVM loads.
+# native-only invariants (the error-classification tests now live in
+# `datafusion-jni-common`). The `cdylib` is still the artifact the JVM loads.
crate-type = ["cdylib", "rlib"]
[features]
@@ -75,28 +78,27 @@
spark = ["dep:datafusion-spark"]
[dependencies]
-arrow = { version = "58", features = ["ffi"] }
-async-trait = "0.1"
-datafusion = { version = "53.1.0", features = ["avro"] }
-datafusion-proto = "53.1.0"
+arrow = { workspace = true }
+async-trait = { workspace = true }
+datafusion = { workspace = true, features = ["avro"] }
+# Shared JNI plumbing (error->exception mapping, runtime singleton,
+# StreamingReader). `avro` keeps the classifier's AvroError->IoException arm
+# in sync with the `avro` feature on `datafusion` above.
+datafusion-jni-common = { path = "../native-common", features = ["avro"] }
+datafusion-proto = { workspace = true }
# Apache Spark-compatible functions + expression planners. Optional and
# gated behind the `spark` feature (in the default set). The `core` feature
# of the crate is what exposes `SessionStateBuilderSpark`.
-datafusion-spark = { version = "53.1.0", features = ["core"], optional = true }
-datafusion-substrait = { version = "53.1.0", optional = true }
-futures = "0.3"
-jni = "0.21"
-# Pin to the same major as DataFusion 53.1 pulls in transitively (0.13.x)
-# so we share the same `dyn ObjectStore` vtable and don't double-link.
-object_store = { version = "0.13", default-features = false }
-prost = "0.14"
-tokio = { version = "1", features = ["rt-multi-thread"] }
-# Tokio runtime metrics. Optional + cfg-gated: this crate's API surface lives
-# behind `--cfg tokio_unstable`, so enabling the `runtime-metrics` feature also
-# requires the caller to set `RUSTFLAGS="--cfg tokio_unstable"` at build time.
-tokio-metrics = { version = "0.5", optional = true }
-url = "2"
+datafusion-spark = { workspace = true, features = ["core"], optional = true }
+datafusion-substrait = { workspace = true, optional = true }
+futures = { workspace = true }
+jni = { workspace = true }
+object_store = { workspace = true }
+prost = { workspace = true }
+tokio = { workspace = true }
+tokio-metrics = { workspace = true, optional = true }
+url = { workspace = true }
[build-dependencies]
-prost-build = "0.14"
-protoc-bin-vendored = "3"
+prost-build = { workspace = true }
+protoc-bin-vendored = { workspace = true }
diff --git a/native/src/arrow.rs b/native/src/arrow.rs
index 2bbe7b0..67e5caf 100644
--- a/native/src/arrow.rs
+++ b/native/src/arrow.rs
@@ -23,10 +23,10 @@
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::ArrowReadOptionsProto;
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_arrow_options<R>(
env: &mut JNIEnv,
diff --git a/native/src/avro.rs b/native/src/avro.rs
index 85d4a07..257ae32 100644
--- a/native/src/avro.rs
+++ b/native/src/avro.rs
@@ -23,10 +23,10 @@
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::AvroReadOptionsProto;
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_avro_options<R>(
env: &mut JNIEnv,
diff --git a/native/src/cache_manager.rs b/native/src/cache_manager.rs
index 3b9e286..ec38dc8 100644
--- a/native/src/cache_manager.rs
+++ b/native/src/cache_manager.rs
@@ -34,8 +34,8 @@
};
use datafusion::execution::cache::DefaultListFilesCache;
-use crate::errors::JniResult;
use crate::proto_gen::CacheManagerOptionsProto;
+use datafusion_jni_common::errors::JniResult;
/// Build a [`CacheManagerConfig`] from the proto. Returns `Ok(None)` if the
/// caller did not set any cache-manager field, so the JNI layer can skip the
diff --git a/native/src/csv.rs b/native/src/csv.rs
index 3ae4627..b79ed59 100644
--- a/native/src/csv.rs
+++ b/native/src/csv.rs
@@ -26,12 +26,12 @@
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::{
CsvReadOptionsProto, CsvWriteOptionsProto, FileCompressionType as ProtoFileCompressionType,
};
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_csv_options<R>(
env: &mut JNIEnv,
diff --git a/native/src/json.rs b/native/src/json.rs
index 8eea32f..b87be78 100644
--- a/native/src/json.rs
+++ b/native/src/json.rs
@@ -27,12 +27,12 @@
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::proto_gen::{
FileCompressionType as ProtoFileCompressionType, JsonWriteOptionsProto, NdJsonReadOptionsProto,
};
use crate::runtime;
use crate::schema::decode_optional_schema;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
fn with_json_options<R>(
env: &mut JNIEnv,
diff --git a/native/src/lib.rs b/native/src/lib.rs
index 43161d2..56bef5d 100644
--- a/native/src/lib.rs
+++ b/native/src/lib.rs
@@ -19,7 +19,6 @@
mod avro;
mod cache_manager;
mod csv;
-mod errors;
mod jni_util;
mod json;
mod memory;
@@ -34,16 +33,13 @@
include!(concat!(env!("OUT_DIR"), "/datafusion_java.rs"));
}
-use std::panic::{catch_unwind, AssertUnwindSafe};
use std::path::PathBuf;
use std::sync::{Arc, OnceLock};
-use datafusion::arrow::array::RecordBatch;
use datafusion::arrow::datatypes::SchemaRef;
-use datafusion::arrow::error::ArrowError;
use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream;
use datafusion::arrow::ipc::writer::StreamWriter;
-use datafusion::arrow::record_batch::{RecordBatchIterator, RecordBatchReader};
+use datafusion::arrow::record_batch::RecordBatchIterator;
use datafusion::common::{JoinType, UnnestOptions};
use datafusion::config::TableParquetOptions;
use datafusion::dataframe::DataFrame;
@@ -51,11 +47,9 @@
use datafusion::error::DataFusionError;
use datafusion::execution::disk_manager::{DiskManagerBuilder, DiskManagerMode};
use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder};
-use datafusion::execution::SendableRecordBatchStream;
use datafusion::logical_expr::Expr;
use datafusion::logical_expr::{col, Partitioning, ScalarUDF, Signature, SortExpr};
use datafusion::prelude::{ParquetReadOptions, SessionConfig, SessionContext};
-use futures::StreamExt;
use jni::objects::{JBooleanArray, JByteArray, JClass, JObject, JObjectArray, JString};
use jni::sys::{jboolean, jbyte, jbyteArray, jint, jlong};
use jni::JNIEnv;
@@ -63,7 +57,10 @@
use prost::Message;
use tokio::runtime::Runtime;
-use crate::errors::{try_unwrap_or_throw, JniResult};
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
+// Re-exported so sibling modules keep their crate-local `crate::StreamingReader` path.
+pub(crate) use datafusion_jni_common::StreamingReader;
+
use crate::proto_gen::ParquetReadOptionsProto;
use crate::proto_gen::SessionOptions;
use crate::schema::decode_optional_schema;
@@ -84,18 +81,15 @@
}
pub(crate) fn runtime() -> &'static Runtime {
- static RT: OnceLock<Runtime> = OnceLock::new();
- RT.get_or_init(|| {
- let rt = Runtime::new().expect("failed to create Tokio runtime");
- // Eagerly install the runtime-metrics accumulator (no-op when the
- // `runtime-metrics` Cargo feature is off). Initialising here -- not
- // lazily on the first `runtimeStats()` call -- means the
- // RuntimeMonitor's sampling baseline coincides with runtime start, so
- // poll/park/busy totals reflect activity from the first query onward
- // rather than from the first observation.
- crate::runtime_metrics::init(rt.handle());
- rt
- })
+ // The singleton itself lives in datafusion-jni-common (shared with the
+ // datafusion-spark-bridge SDK; each cdylib statically links its own
+ // copy, so the runtime stays per-library). The init hook eagerly installs the
+ // runtime-metrics accumulator (no-op when the `runtime-metrics` Cargo
+ // feature is off). Initialising here -- not lazily on the first
+ // `runtimeStats()` call -- means the RuntimeMonitor's sampling baseline
+ // coincides with runtime start, so poll/park/busy totals reflect activity
+ // from the first query onward rather than from the first observation.
+ datafusion_jni_common::runtime_with_init(crate::runtime_metrics::init)
}
/// Wrap the (already-built) `RuntimeEnvBuilder`'s memory pool with a
@@ -324,50 +318,6 @@
})
}
-/// Bridges DataFusion's async [`SendableRecordBatchStream`] to the synchronous
-/// [`RecordBatchReader`] interface that `FFI_ArrowArrayStream` (and therefore
-/// the Java `ArrowReader`) consumes. Each call to `next()` drives one
-/// `runtime().block_on(stream.next())`, so memory pressure stays bounded by the
-/// executor pipeline plus a single in-flight batch.
-struct StreamingReader {
- schema: SchemaRef,
- stream: SendableRecordBatchStream,
-}
-
-impl Iterator for StreamingReader {
- type Item = Result<RecordBatch, ArrowError>;
-
- fn next(&mut self) -> Option<Self::Item> {
- // Arrow's C ABI invokes this iterator through FFI_ArrowArrayStream's
- // vtable, outside the JNI handler's try_unwrap_or_throw guard. A panic
- // here (buggy UDF, arrow cast that panics, runtime poison) would
- // unwind across C/FFI -- undefined behaviour. Catch it and surface as
- // an ArrowError so the Java side sees a normal exception instead.
- let next = catch_unwind(AssertUnwindSafe(|| runtime().block_on(self.stream.next())));
- match next {
- Ok(item) => item.map(|r| r.map_err(|e| ArrowError::ExternalError(Box::new(e)))),
- Err(panic) => {
- let msg = if let Some(s) = panic.downcast_ref::<String>() {
- s.clone()
- } else if let Some(s) = panic.downcast_ref::<&str>() {
- (*s).to_string()
- } else {
- "rust panic with non-string payload".to_string()
- };
- Some(Err(ArrowError::ExternalError(
- format!("panic in DataFrame stream: {msg}").into(),
- )))
- }
- }
- }
-}
-
-impl RecordBatchReader for StreamingReader {
- fn schema(&self) -> SchemaRef {
- self.schema.clone()
- }
-}
-
#[no_mangle]
pub extern "system" fn Java_org_apache_datafusion_DataFrame_executeStreamDataFrame<'local>(
mut env: JNIEnv<'local>,
diff --git a/native/src/object_store.rs b/native/src/object_store.rs
index eefccf2..985d721 100644
--- a/native/src/object_store.rs
+++ b/native/src/object_store.rs
@@ -28,9 +28,9 @@
use datafusion::prelude::SessionContext;
use url::Url;
-use crate::errors::JniResult;
use crate::proto_gen::object_store_registration::Backend;
use crate::proto_gen::ObjectStoreRegistration;
+use datafusion_jni_common::errors::JniResult;
#[cfg(feature = "object-store-gcp")]
use crate::proto_gen::GcsOptions;
diff --git a/native/src/proto.rs b/native/src/proto.rs
index 4f187bc..c1315f9 100644
--- a/native/src/proto.rs
+++ b/native/src/proto.rs
@@ -28,8 +28,8 @@
use jni::JNIEnv;
use prost::Message;
-use crate::errors::{try_unwrap_or_throw, JniResult};
use crate::runtime;
+use datafusion_jni_common::errors::{try_unwrap_or_throw, JniResult};
#[no_mangle]
pub extern "system" fn Java_org_apache_datafusion_SessionContext_createDataFrameFromProto<
diff --git a/native/src/runtime_metrics.rs b/native/src/runtime_metrics.rs
index e69410e..dd60dcb 100644
--- a/native/src/runtime_metrics.rs
+++ b/native/src/runtime_metrics.rs
@@ -38,7 +38,7 @@
//! 10 totalOverflowCount
#[cfg(not(feature = "runtime-metrics"))]
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
/// Number of i64 values in the snapshot array; kept here so the Java side and
/// the feature-off stub agree on the layout.
@@ -51,7 +51,7 @@
use tokio_metrics::{RuntimeIntervals, RuntimeMonitor};
use super::STATS_FIELD_COUNT;
- use crate::errors::JniResult;
+ use datafusion_jni_common::errors::JniResult;
/// `RuntimeMonitor::intervals().next()` returns *delta* metrics covering
/// the period since the previous call (or, on the very first call, since
@@ -196,7 +196,7 @@
Err(
"datafusion-jni was built without the `runtime-metrics` Cargo feature; \
rebuild the native crate with \
- `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build --features runtime-metrics` \
+ `RUSTFLAGS=\"--cfg tokio_unstable\" cargo build -p datafusion-jni --features runtime-metrics` \
to enable SessionContext.runtimeStats"
.into(),
)
diff --git a/native/src/schema.rs b/native/src/schema.rs
index 968a73a..0c3c7ab 100644
--- a/native/src/schema.rs
+++ b/native/src/schema.rs
@@ -20,7 +20,7 @@
use jni::objects::JByteArray;
use jni::JNIEnv;
-use crate::errors::JniResult;
+use datafusion_jni_common::errors::JniResult;
/// Decode an optional Arrow-IPC schema byte array passed in from Java.
/// Returns `None` if the byte-array reference is null.
diff --git a/pom.xml b/pom.xml
index 6210841..7ceec07 100644
--- a/pom.xml
+++ b/pom.xml
@@ -97,6 +97,11 @@
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>3.13.0</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.5.2</version>
</plugin>
@@ -173,10 +178,10 @@
<exclude>.mvn/**</exclude>
<!-- Build outputs and generated test data -->
<exclude>**/target/**</exclude>
- <exclude>native/target/**</exclude>
+ <exclude>rust-target/**</exclude>
<exclude>tpch-data/**</exclude>
- <!-- Rust lockfile -->
- <exclude>native/Cargo.lock</exclude>
+ <!-- Rust lockfile (single workspace lock) -->
+ <exclude>Cargo.lock</exclude>
<!-- Source-tarball exclude list (data file consumed by check-rat-report.py) -->
<exclude>dev/release/rat_exclude_files.txt</exclude>
</excludes>