ARROW-12398: [Rust] remove redundant bound check in iterators
This PR removes the bound checks as discussed in #9994.
Furthermore I added `unsafe` versions of the `value` method to `PrimitiveArray` and `BooleanArray`. The `safe` marked methods are actually `unsafe`. This way we can slowly transition to explicitly using the `unsafe` variant and later make the "safe" one truly safe.
For the time being I also added a `debug_assert` bounds check in those "safe" methods that are `unsafe`. That way we at least get a panic in debug mode instead of UB in safe code.
Closes #10046 from ritchie46/iterator_bounds
Authored-by: Ritchie Vink <ritchie46@gmail.com>
Signed-off-by: Andrew Lamb <andrew@nerdnetworks.org>
diff --git a/rust/arrow/src/array/array_boolean.rs b/rust/arrow/src/array/array_boolean.rs
index 2512a95..67af85d 100644
--- a/rust/arrow/src/array/array_boolean.rs
+++ b/rust/arrow/src/array/array_boolean.rs
@@ -69,10 +69,19 @@
/// Returns the boolean value at index `i`.
///
+ /// # Safety
+ /// This doesn't check bounds, the caller must ensure that index < self.len()
+ pub unsafe fn value_unchecked(&self, i: usize) -> bool {
+ let offset = i + self.offset();
+ bit_util::get_bit_raw(self.raw_values.as_ptr(), offset)
+ }
+
+ /// Returns the boolean value at index `i`.
+ ///
/// Note this doesn't do any bound checking, for performance reason.
pub fn value(&self, i: usize) -> bool {
- let offset = i + self.offset();
- unsafe { bit_util::get_bit_raw(self.raw_values.as_ptr(), offset) }
+ debug_assert!(i < self.len());
+ unsafe { self.value_unchecked(i) }
}
}
diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs
index 2280952..d2b3b66 100644
--- a/rust/arrow/src/array/array_primitive.rs
+++ b/rust/arrow/src/array/array_primitive.rs
@@ -90,12 +90,22 @@
/// Returns the primitive value at index `i`.
///
+ /// # Safety
+ ///
+ /// caller must ensure that the passed in offset is less than the array len()
+ pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
+ let offset = i + self.offset();
+ *self.raw_values.as_ptr().add(offset)
+ }
+
+ /// Returns the primitive value at index `i`.
+ ///
/// Note this doesn't do any bound checking, for performance reason.
/// # Safety
/// caller must ensure that the passed in offset is less than the array len()
pub fn value(&self, i: usize) -> T::Native {
- let offset = i + self.offset();
- unsafe { *self.raw_values.as_ptr().add(offset) }
+ debug_assert!(i < self.len());
+ unsafe { self.value_unchecked(i) }
}
/// Creates a PrimitiveArray based on an iterator of values without nulls
diff --git a/rust/arrow/src/array/iterator.rs b/rust/arrow/src/array/iterator.rs
index 28dbe3d..d97aa16 100644
--- a/rust/arrow/src/array/iterator.rs
+++ b/rust/arrow/src/array/iterator.rs
@@ -56,7 +56,12 @@
} else {
let old = self.current;
self.current += 1;
- Some(Some(self.array.value(old)))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(Some(self.array.value_unchecked(old))) }
}
}
@@ -77,7 +82,12 @@
Some(if self.array.is_null(self.current_end) {
None
} else {
- Some(self.array.value(self.current_end))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(self.array.value_unchecked(self.current_end)) }
})
}
}
@@ -118,7 +128,12 @@
} else {
let old = self.current;
self.current += 1;
- Some(Some(self.array.value(old)))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(Some(self.array.value_unchecked(old))) }
}
}
@@ -139,7 +154,12 @@
Some(if self.array.is_null(self.current_end) {
None
} else {
- Some(self.array.value(self.current_end))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(self.array.value_unchecked(self.current_end)) }
})
}
}
@@ -182,7 +202,12 @@
Some(None)
} else {
self.current += 1;
- Some(Some(self.array.value(i)))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(Some(self.array.value_unchecked(i))) }
}
}
@@ -205,7 +230,12 @@
Some(if self.array.is_null(self.current_end) {
None
} else {
- Some(self.array.value(self.current_end))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(self.array.value_unchecked(self.current_end)) }
})
}
}
@@ -251,7 +281,12 @@
Some(None)
} else {
self.current += 1;
- Some(Some(self.array.value(i)))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(Some(self.array.value_unchecked(i))) }
}
}
@@ -274,7 +309,12 @@
Some(if self.array.is_null(self.current_end) {
None
} else {
- Some(self.array.value(self.current_end))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(self.array.value_unchecked(self.current_end)) }
})
}
}
@@ -318,7 +358,12 @@
Some(None)
} else {
self.current += 1;
- Some(Some(self.array.value(i)))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(Some(self.array.value_unchecked(i))) }
}
}
@@ -341,7 +386,12 @@
Some(if self.array.is_null(self.current_end) {
None
} else {
- Some(self.array.value(self.current_end))
+ // Safety:
+ // we just checked bounds in `self.current_end == self.current`
+ // this is safe on the premise that this struct is initialized with
+ // current = array.len()
+ // and that current_end is ever only decremented
+ unsafe { Some(self.array.value_unchecked(self.current_end)) }
})
}
}