Backport: Fix serialization of large integers (#5038) (#5042) to `48.0.0_maintenance` (#5059)
* Fix serialization of large integers (#5038) (#5042)
* fmt
---------
Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index c1cef0e..1bd63de 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -2239,4 +2239,34 @@
let values = b.column(0).as_primitive::<Int32Type>().values();
assert_eq!(values, &[1, 2, 3, 4]);
}
+
+ #[test]
+ fn test_serde_large_numbers() {
+ let field = Field::new("int", DataType::Int64, true);
+ let mut decoder = ReaderBuilder::new_with_field(field)
+ .build_decoder()
+ .unwrap();
+
+ decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap();
+ let b = decoder.flush().unwrap().unwrap();
+ let values = b.column(0).as_primitive::<Int64Type>().values();
+ assert_eq!(values, &[1699148028689, 2, 3, 4]);
+
+ let field = Field::new(
+ "int",
+ DataType::Timestamp(TimeUnit::Microsecond, None),
+ true,
+ );
+ let mut decoder = ReaderBuilder::new_with_field(field)
+ .build_decoder()
+ .unwrap();
+
+ decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap();
+ let b = decoder.flush().unwrap().unwrap();
+ let values = b
+ .column(0)
+ .as_primitive::<TimestampMicrosecondType>()
+ .values();
+ assert_eq!(values, &[1699148028689, 2, 3, 4]);
+ }
}
diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs
index 6cf0bac..daefab4 100644
--- a/arrow-json/src/reader/primitive_array.rs
+++ b/arrow-json/src/reader/primitive_array.rs
@@ -143,7 +143,7 @@
},
TapeElement::I64(high) => match tape.get(p + 1) {
TapeElement::I32(low) => {
- let v = (high as i64) << 32 | low as i64;
+ let v = (high as i64) << 32 | (low as u32) as i64;
let value = NumCast::from(v).ok_or_else(|| {
ArrowError::JsonError(format!("failed to parse {v} as {d}",))
})?;
diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs
index b39caed..d7b6f26 100644
--- a/arrow-json/src/reader/tape.rs
+++ b/arrow-json/src/reader/tape.rs
@@ -180,7 +180,7 @@
TapeElement::Null => out.push_str("null"),
TapeElement::I64(high) => match self.get(idx + 1) {
TapeElement::I32(low) => {
- let val = (high as i64) << 32 | low as i64;
+ let val = (high as i64) << 32 | (low as u32) as i64;
let _ = write!(out, "{val}");
return idx + 2;
}
diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs
index 0967261..5da4868 100644
--- a/arrow-json/src/reader/timestamp_array.rs
+++ b/arrow-json/src/reader/timestamp_array.rs
@@ -99,7 +99,7 @@
TapeElement::I32(v) => builder.append_value(v as i64),
TapeElement::I64(high) => match tape.get(p + 1) {
TapeElement::I32(low) => {
- builder.append_value((high as i64) << 32 | low as i64)
+ builder.append_value((high as i64) << 32 | (low as u32) as i64)
}
_ => unreachable!(),
},