| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| mod csv; |
| mod errors; |
| mod proto; |
| |
| use std::sync::{Arc, OnceLock}; |
| |
| use datafusion::arrow::datatypes::{Schema, SchemaRef}; |
| use datafusion::arrow::ffi_stream::FFI_ArrowArrayStream; |
| use datafusion::arrow::ipc::reader::StreamReader; |
| use datafusion::arrow::record_batch::RecordBatchIterator; |
| use datafusion::config::TableParquetOptions; |
| use datafusion::dataframe::DataFrame; |
| use datafusion::dataframe::DataFrameWriteOptions; |
| use datafusion::error::DataFusionError; |
| use datafusion::prelude::{ParquetReadOptions, SessionContext}; |
| use jni::objects::{JByteArray, JClass, JObjectArray, JString}; |
| use jni::sys::{jboolean, jint, jlong}; |
| use jni::JNIEnv; |
| use tokio::runtime::Runtime; |
| |
| use crate::errors::{try_unwrap_or_throw, JniResult}; |
| |
| pub(crate) fn runtime() -> &'static Runtime { |
| static RT: OnceLock<Runtime> = OnceLock::new(); |
| RT.get_or_init(|| Runtime::new().expect("failed to create Tokio runtime")) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_SessionContext_createSessionContext<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |_env| -> JniResult<jlong> { |
| let ctx = SessionContext::new(); |
| Ok(Box::into_raw(Box::new(ctx)) as jlong) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_SessionContext_createDataFrame<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| sql: JString<'local>, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |env| -> JniResult<jlong> { |
| if handle == 0 { |
| return Err("SessionContext handle is null".into()); |
| } |
| let ctx = unsafe { &*(handle as *const SessionContext) }; |
| let sql_str: String = env.get_string(&sql)?.into(); |
| |
| let df = runtime().block_on(async { ctx.sql(&sql_str).await })?; |
| Ok(Box::into_raw(Box::new(df)) as jlong) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_collectDataFrame<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| ffi_stream_addr: jlong, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |_env| -> JniResult<()> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| if ffi_stream_addr == 0 { |
| return Err("ffi stream address is null".into()); |
| } |
| let df = unsafe { *Box::from_raw(handle as *mut DataFrame) }; |
| |
| let ffi: FFI_ArrowArrayStream = runtime().block_on(async { |
| let schema: SchemaRef = Arc::new(df.schema().as_arrow().clone()); |
| let batches = df.collect().await?; |
| let iter = RecordBatchIterator::new(batches.into_iter().map(Ok), schema); |
| Ok::<_, DataFusionError>(FFI_ArrowArrayStream::new(Box::new(iter))) |
| })?; |
| |
| unsafe { |
| std::ptr::write(ffi_stream_addr as *mut FFI_ArrowArrayStream, ffi); |
| } |
| Ok(()) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_countRows<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |_env| -> JniResult<jlong> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| let n = runtime().block_on(async { df.count().await })?; |
| Ok(n as jlong) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_showDataFrame<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |_env| -> JniResult<()> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| runtime().block_on(async { df.show().await })?; |
| Ok(()) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_showDataFrameWithLimit<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| limit: jint, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |_env| -> JniResult<()> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| runtime().block_on(async { df.show_limit(limit as usize).await })?; |
| Ok(()) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_selectColumns<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| column_names: JObjectArray<'local>, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |env| -> JniResult<jlong> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| |
| let len = env.get_array_length(&column_names)?; |
| let mut owned: Vec<String> = Vec::with_capacity(len as usize); |
| for i in 0..len { |
| let elem = env.get_object_array_element(&column_names, i)?; |
| let jstr: JString = elem.into(); |
| owned.push(env.get_string(&jstr)?.into()); |
| } |
| let refs: Vec<&str> = owned.iter().map(String::as_str).collect(); |
| |
| let new_df = df.select_columns(&refs)?; |
| Ok(Box::into_raw(Box::new(new_df)) as jlong) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_filterRows<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| predicate: JString<'local>, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |env| -> JniResult<jlong> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| let predicate: String = env.get_string(&predicate)?.into(); |
| let expr = df.parse_sql_expr(&predicate)?; |
| let new_df = df.filter(expr)?; |
| Ok(Box::into_raw(Box::new(new_df)) as jlong) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_writeParquetWithOptions<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| path: JString<'local>, |
| compression: JString<'local>, |
| single_file_output_set: jboolean, |
| single_file_output_value: jboolean, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |env| -> JniResult<()> { |
| if handle == 0 { |
| return Err("DataFrame handle is null".into()); |
| } |
| let df = unsafe { &*(handle as *const DataFrame) }.clone(); |
| let path: String = env.get_string(&path)?.into(); |
| |
| let mut write_opts = DataFrameWriteOptions::new(); |
| if single_file_output_set != 0 { |
| write_opts = write_opts.with_single_file_output(single_file_output_value != 0); |
| } |
| |
| let writer_opts: Option<TableParquetOptions> = if !compression.is_null() { |
| let c: String = env.get_string(&compression)?.into(); |
| let mut tpo = TableParquetOptions::default(); |
| tpo.global.compression = Some(c); |
| Some(tpo) |
| } else { |
| None |
| }; |
| |
| runtime().block_on(df.write_parquet(&path, write_opts, writer_opts))?; |
| Ok(()) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_DataFrame_closeDataFrame<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |_env| -> JniResult<()> { |
| if handle != 0 { |
| unsafe { |
| drop(Box::from_raw(handle as *mut DataFrame)); |
| } |
| } |
| Ok(()) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_SessionContext_closeSessionContext<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |_env| -> JniResult<()> { |
| if handle != 0 { |
| unsafe { |
| drop(Box::from_raw(handle as *mut SessionContext)); |
| } |
| } |
| Ok(()) |
| }) |
| } |
| |
| #[allow(clippy::too_many_arguments)] |
| fn with_parquet_options<R>( |
| env: &mut JNIEnv, |
| file_extension: JString, |
| parquet_pruning_set: jboolean, |
| parquet_pruning_value: jboolean, |
| skip_metadata_set: jboolean, |
| skip_metadata_value: jboolean, |
| metadata_size_hint: jlong, |
| schema_ipc_bytes: JByteArray, |
| f: impl FnOnce(ParquetReadOptions) -> JniResult<R>, |
| ) -> JniResult<R> { |
| let file_ext: String = env.get_string(&file_extension)?.into(); |
| |
| let schema: Option<Schema> = if !schema_ipc_bytes.is_null() { |
| let bytes: Vec<u8> = env.convert_byte_array(&schema_ipc_bytes)?; |
| let reader = StreamReader::try_new(std::io::Cursor::new(bytes), None)?; |
| Some((*reader.schema()).clone()) |
| } else { |
| None |
| }; |
| |
| let mut opts = ParquetReadOptions::default().file_extension(&file_ext); |
| if parquet_pruning_set != 0 { |
| opts = opts.parquet_pruning(parquet_pruning_value != 0); |
| } |
| if skip_metadata_set != 0 { |
| opts = opts.skip_metadata(skip_metadata_value != 0); |
| } |
| if metadata_size_hint >= 0 { |
| opts = opts.metadata_size_hint(Some(metadata_size_hint as usize)); |
| } |
| if let Some(ref s) = schema { |
| opts = opts.schema(s); |
| } |
| |
| f(opts) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_SessionContext_registerParquetWithOptions< |
| 'local, |
| >( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| name: JString<'local>, |
| path: JString<'local>, |
| file_extension: JString<'local>, |
| parquet_pruning_set: jboolean, |
| parquet_pruning_value: jboolean, |
| skip_metadata_set: jboolean, |
| skip_metadata_value: jboolean, |
| metadata_size_hint: jlong, |
| schema_ipc_bytes: JByteArray<'local>, |
| ) { |
| try_unwrap_or_throw(&mut env, (), |env| -> JniResult<()> { |
| if handle == 0 { |
| return Err("SessionContext handle is null".into()); |
| } |
| let ctx = unsafe { &*(handle as *const SessionContext) }; |
| let name: String = env.get_string(&name)?.into(); |
| let path: String = env.get_string(&path)?.into(); |
| with_parquet_options( |
| env, |
| file_extension, |
| parquet_pruning_set, |
| parquet_pruning_value, |
| skip_metadata_set, |
| skip_metadata_value, |
| metadata_size_hint, |
| schema_ipc_bytes, |
| |opts| { |
| runtime().block_on(async { |
| ctx.register_parquet(&name, &path, opts).await?; |
| Ok::<(), DataFusionError>(()) |
| })?; |
| Ok(()) |
| }, |
| ) |
| }) |
| } |
| |
| #[no_mangle] |
| pub extern "system" fn Java_org_apache_datafusion_SessionContext_readParquetWithOptions<'local>( |
| mut env: JNIEnv<'local>, |
| _class: JClass<'local>, |
| handle: jlong, |
| path: JString<'local>, |
| file_extension: JString<'local>, |
| parquet_pruning_set: jboolean, |
| parquet_pruning_value: jboolean, |
| skip_metadata_set: jboolean, |
| skip_metadata_value: jboolean, |
| metadata_size_hint: jlong, |
| schema_ipc_bytes: JByteArray<'local>, |
| ) -> jlong { |
| try_unwrap_or_throw(&mut env, 0, |env| -> JniResult<jlong> { |
| if handle == 0 { |
| return Err("SessionContext handle is null".into()); |
| } |
| let ctx = unsafe { &*(handle as *const SessionContext) }; |
| let path: String = env.get_string(&path)?.into(); |
| with_parquet_options( |
| env, |
| file_extension, |
| parquet_pruning_set, |
| parquet_pruning_value, |
| skip_metadata_set, |
| skip_metadata_value, |
| metadata_size_hint, |
| schema_ipc_bytes, |
| |opts| { |
| let df = runtime().block_on(ctx.read_parquet(path, opts))?; |
| Ok(Box::into_raw(Box::new(df)) as jlong) |
| }, |
| ) |
| }) |
| } |