spatialbench-cli/tests/cli_integration.rs - sedona-spatialbench - Git at Google

 use arrow_array::RecordBatch;
 use assert_cmd::Command;
 use parquet::arrow::arrow_reader::{ArrowReaderOptions, ParquetRecordBatchReaderBuilder};
 use parquet::file::metadata::ParquetMetaDataReader;
 use spatialbench::generators::TripGenerator;
 use spatialbench_arrow::{RecordBatchIterator, TripArrow};
 use std::fs;
 use std::fs::File;
 use std::io::Read;
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use tempfile::tempdir;

 /// Test TBL output for scale factor 0.51 and 0.001 using spatialbench-cli
 /// A scale factor of 0.51 is used because a sf of 0.5 and below will yield 0 results in the Building table
 #[test]
 fn test_spatialbench_cli_tbl_scale_factor_v1() {
     // Create a temporary directory
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     // Generate driver, vehicle, customer, building with scale factor 0.51
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("0.51")
         .arg("--format")
         .arg("tbl")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--tables")
         .arg("driver,vehicle,customer,building")
         .assert()
         .success();

     // Generate trip with scale factor 0.01
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("0.001")
         .arg("--format")
         .arg("tbl")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--tables")
         .arg("trip")
         .assert()
         .success();

     // List of expected files
     let expected_files = vec![
         "trip.tbl",
         "customer.tbl",
         "driver.tbl",
         "vehicle.tbl",
         "building.tbl",
     ];

     // Verify that all expected files are created
     for file in &expected_files {
         let generated_file = temp_dir.path().join(file);
         assert!(
             generated_file.exists(),
             "File {:?} does not exist",
             generated_file
         );
         let generated_contents = fs::read(generated_file).expect("Failed to read generated file");
         let generated_contents = String::from_utf8(generated_contents)
             .expect("Failed to convert generated contents to string");

         // load the reference file
         let reference_file = format!("../spatialbench/data/sf-v1/{}.gz", file);
         let reference_contents = match read_gzipped_file_to_string(&reference_file) {
             Ok(contents) => contents,
             Err(e) => {
                 panic!("Failed to read reference file {reference_file}: {e}");
             }
         };

         assert_eq!(
             generated_contents, reference_contents,
             "Contents of {:?} do not match reference",
             file
         );
     }
 }

 /// Test that when creating output, if the file already exists it is not overwritten
 #[test]
 fn test_spatialbench_cli_tbl_no_overwrite() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");
     let expected_file = temp_dir.path().join("trip.tbl");

     let run_command = || {
         Command::cargo_bin("spatialbench-cli")
             .expect("Binary not found")
             .arg("--scale-factor")
             .arg("0.001")
             .arg("--format")
             .arg("tbl")
             .arg("--tables")
             .arg("trip")
             .arg("--output-dir")
             .arg(temp_dir.path())
             .assert()
             .success()
     };

     run_command();
     let original_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), 826311);

     // Run the spatialbench-cli command again with the same parameters and expect the
     // file to not be overwritten
     run_command();
     let new_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), new_metadata.len());
     assert_eq!(
         original_metadata
             .modified()
             .expect("Failed to get modified time"),
         new_metadata
             .modified()
             .expect("Failed to get modified time")
     );
 }

 #[tokio::test]
 async fn test_zone_parquet_no_overwrite() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");
     let expected_file = temp_dir.path().join("zone/zone.1.parquet");

     let run_command = || {
         Command::cargo_bin("spatialbench-cli")
             .expect("Binary not found")
             .arg("--scale-factor")
             .arg("1")
             .arg("--tables")
             .arg("zone")
             .arg("--parts")
             .arg("100")
             .arg("--part")
             .arg("1")
             .arg("--output-dir")
             .arg(temp_dir.path())
             .assert()
             .success()
     };

     run_command();
     let original_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), 25400203);

     // Run the spatialbench-cli command again with the same parameters and expect the
     // file to not be overwritten
     run_command();

     let new_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), new_metadata.len());
     assert_eq!(
         original_metadata
             .modified()
             .expect("Failed to get modified time"),
         new_metadata
             .modified()
             .expect("Failed to get modified time")
     );
 }

 // Test that when creating output, if the file already exists it is not for parquet
 #[test]
 fn test_spatialbench_cli_parquet_no_overwrite() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");
     let expected_file = temp_dir.path().join("building.parquet");

     let run_command = || {
         Command::cargo_bin("spatialbench-cli")
             .expect("Binary not found")
             .arg("--scale-factor")
             .arg("0.001")
             .arg("--tables")
             .arg("building")
             .arg("--output-dir")
             .arg(temp_dir.path())
             .assert()
             .success()
     };

     run_command();
     let original_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), 412);

     // Run the spatialbench-cli command again with the same parameters and expect the
     // file to not be overwritten
     run_command();

     let new_metadata =
         fs::metadata(&expected_file).expect("Failed to get metadata of generated file");
     assert_eq!(original_metadata.len(), new_metadata.len());
     assert_eq!(
         original_metadata
             .modified()
             .expect("Failed to get modified time"),
         new_metadata
             .modified()
             .expect("Failed to get modified time")
     );
 }

 /// Test zone parquet output determinism - same data should be generated every time
 #[tokio::test]
 async fn test_zone_deterministic_parts_generation() {
     let temp_dir1 = tempdir().expect("Failed to create temporary directory 1");

     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--format")
         .arg("parquet")
         .arg("--scale-factor")
         .arg("1.0")
         .arg("--output-dir")
         .arg(temp_dir1.path())
         .arg("--tables")
         .arg("zone")
         .arg("--parts")
         .arg("100")
         .arg("--part")
         .arg("1")
         .assert()
         .success();

     let zone_file1 = temp_dir1.path().join("zone/zone.1.parquet");

     // Reference file is a sf=0.01 zone table with z_boundary column removed
     let reference_file = PathBuf::from("../spatialbench/data/sf-v1/zone.parquet");

     assert!(
         zone_file1.exists(),
         "First zone.parquet file was not created"
     );
     assert!(
         reference_file.exists(),
         "Reference zone.parquet file does not exist"
     );

     let file1 = File::open(&zone_file1).expect("Failed to open generated zone.parquet file");
     let file2 = File::open(&reference_file).expect("Failed to open reference zone.parquet file");

     let reader1 = ParquetRecordBatchReaderBuilder::try_new(file1)
         .expect("Failed to create reader for generated file")
         .build()
         .expect("Failed to build reader for generated file");

     let reader2 = ParquetRecordBatchReaderBuilder::try_new(file2)
         .expect("Failed to create reader for reference file")
         .build()
         .expect("Failed to build reader for reference file");

     let batches1: Result<Vec<RecordBatch>, _> = reader1.collect();
     let batches2: Result<Vec<RecordBatch>, _> = reader2.collect();

     let batches1 = batches1.expect("Failed to read batches from generated file");
     let batches2 = batches2.expect("Failed to read batches from reference file");

     // Check that files are non-empty
     assert!(
         !batches1.is_empty(),
         "Generated zone parquet file has no data"
     );
     assert!(
         !batches2.is_empty(),
         "Reference zone parquet file has no data"
     );

     // Check that both files have the same number of batches
     assert_eq!(
         batches1.len(),
         batches2.len(),
         "Different number of record batches"
     );

     // Compare each batch, excluding z_boundary column
     for (i, (batch1, batch2)) in batches1.iter().zip(batches2.iter()).enumerate() {
         assert_eq!(
             batch1.num_rows(),
             batch2.num_rows(),
             "Batch {} has different number of rows",
             i
         );

         let schema1 = batch1.schema();

         // Compare all columns except z_boundary
         for field in schema1.fields() {
             let column_name = field.name();
             if column_name == "z_boundary" {
                 continue;
             }

             let col1 = batch1
                 .column_by_name(column_name)
                 .unwrap_or_else(|| panic!("Column {} not found in generated file", column_name));
             let col2 = batch2
                 .column_by_name(column_name)
                 .unwrap_or_else(|| panic!("Column {} not found in reference file", column_name));

             assert_eq!(
                 col1, col2,
                 "Column {} differs between generated and reference files in batch {}",
                 column_name, i
             );
         }
     }
 }

 /// Test generating the trip table using 4 parts implicitly
 #[test]
 fn test_spatialbench_cli_parts() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     // generate 4 parts of the trip table with scale factor 0.001 and let
     // spatialbench-cli generate the multiple files

     let num_parts = 4;
     let output_dir = temp_dir.path().to_path_buf();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("0.001")
         .arg("--format")
         .arg("tbl")
         .arg("--output-dir")
         .arg(&output_dir)
         .arg("--parts")
         .arg(num_parts.to_string())
         .arg("--tables")
         .arg("trip")
         .assert()
         .success();

     verify_table(temp_dir.path(), "trip", num_parts, "v1");
 }

 /// Test generating the order table with multiple invocations using --parts and
 /// --part options
 #[test]
 fn test_spatialbench_cli_parts_explicit() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     // generate 4 parts of the orders table with scale factor 0.001
     // use threads to run the command concurrently to minimize the time taken
     let num_parts = 4;
     let mut threads = vec![];
     for part in 1..=num_parts {
         let output_dir = temp_dir.path().to_path_buf();
         threads.push(std::thread::spawn(move || {
             // Run the spatialbench-cli command for each part
             // output goes into `output_dir/orders/orders.{part}.tbl`
             Command::cargo_bin("spatialbench-cli")
                 .expect("Binary not found")
                 .arg("--scale-factor")
                 .arg("0.001")
                 .arg("--format")
                 .arg("tbl")
                 .arg("--output-dir")
                 .arg(&output_dir)
                 .arg("--parts")
                 .arg(num_parts.to_string())
                 .arg("--part")
                 .arg(part.to_string())
                 .arg("--tables")
                 .arg("trip")
                 .assert()
                 .success();
         }));
     }
     // Wait for all threads to finish
     for thread in threads {
         thread.join().expect("Thread panicked");
     }
     verify_table(temp_dir.path(), "trip", num_parts, "v1");
 }

 /// Create all tables using --parts option and verify the output layouts
 #[test]
 fn test_spatialbench_cli_parts_all_tables() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     let num_parts = 8;
     let output_dir = temp_dir.path().to_path_buf();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("0.51")
         .arg("--format")
         .arg("tbl")
         .arg("--tables")
         .arg("building,driver,vehicle,customer")
         .arg("--output-dir")
         .arg(&output_dir)
         .arg("--parts")
         .arg(num_parts.to_string())
         .assert()
         .success();

     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("0.001")
         .arg("--format")
         .arg("tbl")
         .arg("--tables")
         .arg("trip")
         .arg("--output-dir")
         .arg(&output_dir)
         .arg("--parts")
         .arg(num_parts.to_string())
         .assert()
         .success();

     verify_table(temp_dir.path(), "trip", num_parts, "v1");
     verify_table(temp_dir.path(), "customer", num_parts, "v1");
     // Note, building, vehicle and driver have only a single part regardless of --parts
     verify_table(temp_dir.path(), "building", 1, "v1");
     verify_table(temp_dir.path(), "vehicle", 1, "v1");
     verify_table(temp_dir.path(), "driver", 1, "v1");
 }

 /// Read the N files from `output_dir/table_name/table_name.part.tbl` into a
 /// single buffer and compare them to the contents of the reference file
 fn verify_table(output_dir: &Path, table_name: &str, parts: usize, scale_factor: &str) {
     let mut output_contents = Vec::new();
     for part in 1..=parts {
         let generated_file = output_dir
             .join(table_name)
             .join(format!("{table_name}.{part}.tbl"));
         assert!(
             generated_file.exists(),
             "File {:?} does not exist",
             generated_file
         );
         let generated_contents =
             fs::read_to_string(generated_file).expect("Failed to read generated file");
         output_contents.append(&mut generated_contents.into_bytes());
     }
     let output_contents =
         String::from_utf8(output_contents).expect("Failed to convert output contents to string");

     // load the reference file
     let reference_file = read_reference_file(table_name, scale_factor);
     assert_eq!(output_contents, reference_file);
 }

 #[tokio::test]
 async fn test_write_parquet_trips() {
     // Run the CLI command to generate parquet data
     let output_dir = tempdir().unwrap();
     let output_path = output_dir.path().join("trip.parquet");
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--tables")
         .arg("trip")
         .arg("--scale-factor")
         .arg("0.1")
         .arg("--output-dir")
         .arg(output_dir.path())
         .assert()
         .success();

     let batch_size = 4000;

     // Create the reference Arrow data using TripArrow
     let generator = TripGenerator::new(0.1, 1, 1);
     let mut arrow_generator = TripArrow::new(generator).with_batch_size(batch_size);

     // Read the generated parquet file
     let file = File::open(&output_path).expect("Failed to open parquet file");
     let options = ArrowReaderOptions::new().with_schema(Arc::clone(arrow_generator.schema()));

     let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options)
         .expect("Failed to create ParquetRecordBatchReaderBuilder")
         .with_batch_size(batch_size)
         .build()
         .expect("Failed to build ParquetRecordBatchReader");

     // Compare the record batches
     for batch in reader {
         let parquet_batch = batch.expect("Failed to read record batch from parquet");
         let arrow_batch = arrow_generator
             .next()
             .expect("Failed to generate record batch from TripArrow");
         assert_eq!(
             parquet_batch, arrow_batch,
             "Mismatch between parquet and arrow record batches"
         );
     }
 }

 #[tokio::test]
 async fn test_write_parquet_row_group_size_default() {
     // Run the CLI command to generate parquet data with default settings
     let output_dir = tempdir().unwrap();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("1")
         .arg("--tables")
         .arg("trip,driver,vehicle,customer,building")
         .arg("--output-dir")
         .arg(output_dir.path())
         .assert()
         .success();

     expect_row_group_sizes(
         output_dir.path(),
         vec![
             RowGroups {
                 table: "customer",
                 row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![123493205, 123460055, 123449607, 123465483],
             },
             RowGroups {
                 table: "driver",
                 row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
                 row_group_bytes: vec![5214],
             },
             RowGroups {
                 table: "building",
                 row_group_bytes: vec![2492359],
             },
         ],
     );
 }

 #[tokio::test]
 async fn test_zone_write_parquet_row_group_size_default() {
     // Run the CLI command to generate parquet data with default settings
     let output_dir = tempdir().unwrap();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--format")
         .arg("parquet")
         .arg("--scale-factor")
         .arg("1")
         .arg("--tables")
         .arg("zone")
         .arg("--output-dir")
         .arg(output_dir.path())
         .arg("--parts")
         .arg("10")
         .arg("--part")
         .arg("1")
         .assert()
         .success();

     expect_row_group_sizes(
         output_dir.path(),
         vec![RowGroups {
             table: "zone/zone.1",
             row_group_bytes: vec![86288517],
         }],
     );
 }

 #[tokio::test]
 async fn test_write_parquet_row_group_size_20mb() {
     // Run the CLI command to generate parquet data with larger row group size
     let output_dir = tempdir().unwrap();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--scale-factor")
         .arg("1")
         .arg("--tables")
         .arg("trip,driver,vehicle,customer,building")
         .arg("--output-dir")
         .arg(output_dir.path())
         .arg("--parquet-row-group-bytes")
         .arg("20000000") // 20 MB
         .assert()
         .success();

     expect_row_group_sizes(
         output_dir.path(),
         vec![
             RowGroups {
                 table: "customer",
                 row_group_bytes: vec![2599669],
             },
             RowGroups {
                 table: "trip",
                 row_group_bytes: vec![
                     24356144, 24356407, 24345650, 24343404, 24348327, 24330535, 24353663, 24337733,
                     24340689, 24356034, 24332349, 24340694, 24343446, 24356122, 24356250, 24340986,
                     24345859, 24333134, 24343026, 24356402, 24346155,
                 ],
             },
             RowGroups {
                 table: "driver",
                 row_group_bytes: vec![41361],
             },
             RowGroups {
                 table: "vehicle",
                 row_group_bytes: vec![5214],
             },
             RowGroups {
                 table: "building",
                 row_group_bytes: vec![2492359],
             },
         ],
     );
 }

 #[tokio::test]
 async fn test_zone_write_parquet_row_group_size_20mb() {
     // Run the CLI command to generate parquet data with larger row group size
     let output_dir = tempdir().unwrap();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--format")
         .arg("parquet")
         .arg("--scale-factor")
         .arg("1")
         .arg("--tables")
         .arg("zone")
         .arg("--output-dir")
         .arg(output_dir.path())
         .arg("--parquet-row-group-bytes")
         .arg("20000000") // 20 MB
         .arg("--parts")
         .arg("10")
         .arg("--part")
         .arg("1")
         .assert()
         .success();

     expect_row_group_sizes(
         output_dir.path(),
         vec![RowGroups {
             table: "zone/zone.1",
             row_group_bytes: vec![15428592, 17250042, 19338201, 17046885, 17251978],
         }],
     );
 }

 #[test]
 fn test_spatialbench_cli_part_no_parts() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     // CLI Error test --part but not --parts
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--part")
         .arg("42")
         .assert()
         .failure()
         .stderr(predicates::str::contains(
             "The --part option requires the --parts option to be set",
         ));
 }

 #[test]
 fn test_spatialbench_cli_too_many_parts() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     // This should fail because --part is 42 which is more than the --parts 10
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--part")
         .arg("42")
         .arg("--parts")
         .arg("10")
         .assert()
         .failure()
         .stderr(predicates::str::contains(
             "Invalid --part. Expected at most the value of --parts (10), got 42",
         ));
 }

 #[test]
 fn test_spatialbench_cli_zero_part() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--part")
         .arg("0")
         .arg("--parts")
         .arg("10")
         .assert()
         .failure()
         .stderr(predicates::str::contains(
             "Invalid --part. Expected a number greater than zero, got 0",
         ));
 }
 #[test]
 fn test_spatialbench_cli_zero_part_zero_parts() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .arg("--part")
         .arg("0")
         .arg("--parts")
         .arg("0")
         .assert()
         .failure()
         .stderr(predicates::str::contains(
             "Invalid --part. Expected a number greater than zero, got 0",
         ));
 }

 /// Test specifying parquet options even when writing tbl output
 #[tokio::test]
 async fn test_incompatible_options_warnings() {
     let output_dir = tempdir().unwrap();
     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--format")
         .arg("csv")
         .arg("--tables")
         .arg("trip")
         .arg("--scale-factor")
         .arg("0.0001")
         .arg("--output-dir")
         .arg(output_dir.path())
         // pass in parquet options that are incompatible with csv
         .arg("--parquet-compression")
         .arg("zstd(1)")
         .arg("--parquet-row-group-bytes")
         .arg("8192")
         .assert()
         // still success, but should see warnings
         .success()
         .stderr(predicates::str::contains(
             "Warning: Parquet compression option set but not generating Parquet files",
         ))
         .stderr(predicates::str::contains(
             "Warning: Parquet row group size option set but not generating Parquet files",
         ));
 }

 #[test]
 fn test_zone_generation_tbl_fails() {
     let temp_dir = tempdir().expect("Failed to create temporary directory");

     Command::cargo_bin("spatialbench-cli")
         .expect("Binary not found")
         .arg("--format")
         .arg("tbl")
         .arg("--scale-factor")
         .arg("1")
         .arg("--tables")
         .arg("zone")
         .arg("--output-dir")
         .arg(temp_dir.path())
         .assert()
         .failure()
         .stderr(predicates::str::contains(
             "Zone table is only supported in --format=parquet",
         ));
 }

 fn read_gzipped_file_to_string<P: AsRef<Path>>(path: P) -> Result<String, std::io::Error> {
     let file = File::open(path)?;
     let mut decoder = flate2::read::GzDecoder::new(file);
     let mut contents = Vec::new();
     decoder.read_to_end(&mut contents)?;
     let contents = String::from_utf8(contents)
         .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
     Ok(contents)
 }

 /// Reads the reference file for the specified table and scale factor.
 ///
 /// example usage: `read_reference_file("trip", "0.1")`
 fn read_reference_file(table_name: &str, scale_factor: &str) -> String {
     let reference_file = format!("../spatialbench/data/sf-{scale_factor}/{table_name}.tbl.gz");
     match read_gzipped_file_to_string(&reference_file) {
         Ok(contents) => contents,
         Err(e) => {
             panic!("Failed to read reference file {reference_file}: {e}");
         }
     }
 }

 #[derive(Debug, PartialEq)]
 struct RowGroups {
     table: &'static str,
     /// total bytes in each row group
     row_group_bytes: Vec<i64>,
 }

 /// For each table in tables, check that the parquet file in output_dir has
 /// a file with the expected row group sizes.
 fn expect_row_group_sizes(output_dir: &Path, expected_row_groups: Vec<RowGroups>) {
     let mut actual_row_groups = vec![];
     for table in &expected_row_groups {
         let output_path = output_dir.join(format!("{}.parquet", table.table));
         assert!(
             output_path.exists(),
             "Expected parquet file {:?} to exist",
             output_path
         );
         // read the metadata to get the row group size
         let file = File::open(&output_path).expect("Failed to open parquet file");
         let mut metadata_reader = ParquetMetaDataReader::new();
         metadata_reader.try_parse(&file).unwrap();
         let metadata = metadata_reader.finish().unwrap();
         let row_groups = metadata.row_groups();
         let actual_row_group_bytes: Vec<_> =
             row_groups.iter().map(|rg| rg.total_byte_size()).collect();
         actual_row_groups.push(RowGroups {
             table: table.table,
             row_group_bytes: actual_row_group_bytes,
         })
     }
     // compare the expected and actual row groups debug print actual on failure
     // for better output / easier comparison
     let expected_row_groups = format!("{expected_row_groups:#?}");
     let actual_row_groups = format!("{actual_row_groups:#?}");
     assert_eq!(actual_row_groups, expected_row_groups);
 }