| //! [`TPCHDate`] and date handling |
| use rand::rngs::StdRng; |
| use rand::{Rng, SeedableRng}; |
| use std::{ |
| fmt::{Display, Formatter}, |
| sync::LazyLock, |
| }; |
| |
| /// The value of 1970-01-01 in the date generator system |
| pub const GENERATED_DATE_EPOCH_OFFSET: i32 = 83966; |
| /// The minimum date that can be generated by the data generator, |
| /// corresponding to `1992-01-01`. |
| /// |
| /// The maximum date that can be generated by the data generator is this value |
| /// plus [`TOTAL_DATE_RANGE`]. |
| pub const MIN_GENERATE_DATE: i32 = 92001; |
| const CURRENT_DATE: i32 = 95168; |
| /// The total number of days that can be generated by the data generator |
| pub const TOTAL_DATE_RANGE: i32 = 2557; |
| |
| /// Month boundaries for a standard (non-leap) year |
| const MONTH_YEAR_DAY_START: [i32; 13] = |
| [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365]; |
| |
| /// Lookup table for converting dates from the data generator to strings |
| /// |
| /// index: dates generated by the data generator |
| /// value: strings like 1992-01-01 |
| static DATE_TO_STRING: LazyLock<Vec<String>> = LazyLock::new(make_date_string_index); |
| |
| /// Lookup table for julian date format used to check if a given date is in |
| /// the past as part of the lineitem generation. |
| static JULIAN_DATE: LazyLock<Vec<i32>> = LazyLock::new(|| { |
| (0..TOTAL_DATE_RANGE) |
| .map(|date| julian(date + MIN_GENERATE_DATE)) |
| .collect() |
| }); |
| |
| pub struct GenerateUtils; |
| |
| impl GenerateUtils { |
| /// Calculates row count with linear scaling (original behavior) |
| pub fn calculate_row_count( |
| scale_base: i32, |
| scale_factor: f64, |
| part: i32, |
| part_count: i32, |
| ) -> i64 { |
| Self::calculate_scaled_row_count(scale_base, scale_factor, part, part_count, false) |
| } |
| |
| /// Calculates row count with logarithmic scaling (for buildings) |
| pub fn calculate_logarithmic_row_count( |
| scale_base: i32, |
| scale_factor: f64, |
| part: i32, |
| part_count: i32, |
| ) -> i64 { |
| Self::calculate_scaled_row_count(scale_base, scale_factor, part, part_count, true) |
| } |
| |
| /// Internal implementation for row count calculation with scaling option |
| fn calculate_scaled_row_count( |
| scale_base: i32, |
| scale_factor: f64, |
| part: i32, |
| part_count: i32, |
| log_scale: bool, |
| ) -> i64 { |
| let total_row_count = if log_scale { |
| (scale_base as f64 * (1.0 + scale_factor.log2())) as i64 |
| } else { |
| (scale_base as f64 * scale_factor) as i64 |
| }; |
| |
| let rows_per_part = total_row_count / part_count as i64; |
| |
| if part == part_count { |
| // for the last part, add the remainder rows |
| rows_per_part + (total_row_count % part_count as i64) |
| } else { |
| rows_per_part |
| } |
| } |
| |
| /// Calculates start index for a specific part of the data |
| pub fn calculate_start_index( |
| scale_base: i32, |
| scale_factor: f64, |
| part: i32, |
| part_count: i32, |
| ) -> i64 { |
| let total_row_count = (scale_base as f64 * scale_factor) as i64; |
| let rows_per_part = total_row_count / part_count as i64; |
| rows_per_part * (part as i64 - 1) |
| } |
| } |
| |
| /// Random time generator that produces hours and minutes |
| #[derive(Debug, Clone)] |
| pub struct RandomTimeOfDay { |
| rng: StdRng, |
| } |
| |
| impl RandomTimeOfDay { |
| /// Creates a new RandomTimeOfDay generator with the given seed |
| pub fn new(seed: u64) -> Self { |
| RandomTimeOfDay { |
| rng: StdRng::seed_from_u64(seed), |
| } |
| } |
| |
| /// Generates a random time of day as (hour, minute, second) |
| pub fn next_value(&mut self) -> (u8, u8, u8) { |
| let hour = self.rng.gen_range(0..24); |
| let minute = self.rng.gen_range(0..60); |
| let second = self.rng.gen_range(0..60); |
| (hour, minute, second) |
| } |
| |
| /// Advances the generator by a given number of rows |
| pub fn advance_rows(&mut self, rows: i64) { |
| for _ in 0..rows { |
| self.next_value(); |
| } |
| } |
| |
| /// Mark this row as finished |
| /// |
| /// This is a no-op for this generator since it doesn't need row-specific state tracking |
| /// but is required to match the interface pattern used by other random generators |
| pub fn row_finished(&mut self) { |
| // No operation needed - StdRng doesn't require row-based state management |
| } |
| } |
| |
| /// Represents a date (day/year) |
| /// |
| /// Example display: 1992-01-01 |
| /// |
| /// The date is stored as an index from the [`MIN_GENERATE_DATE`] |
| /// |
| /// # Example |
| /// ``` |
| /// # use spatialbench::dates::{TPCHDate, MIN_GENERATE_DATE}; |
| /// let date = TPCHDate::new(MIN_GENERATE_DATE + 41, 0, 0, 0); |
| /// // Convert the date to y/m/d fields |
| /// assert_eq!((92,2,11), date.to_ymd()); |
| /// // format as a string using the Display impl |
| /// assert_eq!("1992-02-11 00:00:00", date.to_string()); |
| /// ``` |
| #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord)] |
| pub struct TPCHDate { |
| date_index: i32, |
| hour: u8, // 0-23 |
| minute: u8, // 0-59 |
| second: u8, |
| } |
| |
| impl Display for TPCHDate { |
| fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
| write!( |
| f, |
| "{} {:02}:{:02}:{:02}", |
| &DATE_TO_STRING[self.date_index as usize], self.hour, self.minute, self.second |
| ) |
| } |
| } |
| |
| impl TPCHDate { |
| /// Number of days that must be added to a TPCH date to get a Unix epoch |
| /// relative date. |
| /// |
| /// * Unix epoch relative dates are days since the epoch (1970-01-01) |
| /// * [`TPCHDate`]s are days since [`MIN_GENERATE_DATE`] (1992-01-01) |
| /// |
| /// This value is `8035` because `1992-01-01` is `8035` days after `1970-01-01` |
| pub const UNIX_EPOCH_OFFSET: i32 = 8035; |
| |
| /// Create a new TPCHDate from a generated date |
| pub fn new(generated_date: i32, hour: u8, minute: u8, second: u8) -> Self { |
| Self { |
| date_index: generated_date - MIN_GENERATE_DATE, |
| hour, |
| minute, |
| second, |
| } |
| } |
| |
| /// Create a new date with a given day value and time components including seconds |
| pub fn new_with_time(day_value: i32, (hour, minute, second): (u8, u8, u8)) -> Self { |
| TPCHDate::new(day_value, hour, minute, second) |
| } |
| |
| /// Returns the (year, month, day) of this date |
| /// |
| /// For a date that represents `1992-02-03`, this function will return |
| /// |
| /// * year: `92` |
| /// * month: `2` |
| /// * day: `3` |
| pub fn to_ymd(&self) -> (i32, i32, i32) { |
| to_ymd(self.date_index + 1) |
| } |
| |
| /// Return the inner date index |
| pub fn into_inner(self) -> i32 { |
| self.date_index |
| } |
| |
| /// Checks if a date is in the past |
| pub fn is_in_past(date: i32) -> bool { |
| Self::to_julian(date) <= CURRENT_DATE |
| } |
| |
| /// Lookup if a date is in the past. |
| fn to_julian(date: i32) -> i32 { |
| JULIAN_DATE[(date - MIN_GENERATE_DATE) as usize] |
| } |
| |
| /// Returns the number of days since the Unix epoch this date |
| /// represents. |
| #[inline(always)] |
| pub fn to_unix_epoch(&self) -> i32 { |
| self.date_index + Self::UNIX_EPOCH_OFFSET |
| } |
| |
| /// Returns the number of seconds since the Unix epoch this date represents, |
| /// including the time components (hour, minute, second) |
| #[inline(always)] |
| pub fn to_unix_epoch_seconds(&self) -> i64 { |
| // Start with days since Unix epoch converted to seconds |
| let base_seconds = (self.date_index + Self::UNIX_EPOCH_OFFSET) as i64 * 24 * 60 * 60; |
| |
| // Add the time components in seconds |
| let time_seconds = |
| (self.hour as i64) * 3600 + (self.minute as i64) * 60 + (self.second as i64); |
| |
| base_seconds + time_seconds |
| } |
| } |
| |
| /// Creates a index table of formatted strings |
| /// |
| /// index: dates generated by the data generator |
| /// value: strings like 1992-01-01 |
| fn make_date_string_index() -> Vec<String> { |
| (0..TOTAL_DATE_RANGE) |
| .map(|i| { |
| let (y, m, dy) = to_ymd(i + 1); |
| format_ymd(y, m, dy) |
| }) |
| .collect() |
| } |
| |
| /// Returns the (year, month, day) for the specified date index |
| /// |
| /// See [`TPCHDate::to_ymd`] for more information |
| fn to_ymd(index: i32) -> (i32, i32, i32) { |
| let y = julian(index + MIN_GENERATE_DATE - 1) / 1000; |
| let d = julian(index + MIN_GENERATE_DATE - 1) % 1000; |
| |
| let mut m = 0; |
| while d > MONTH_YEAR_DAY_START[m as usize] + leap_year_adjustment(y, m) { |
| m += 1; |
| } |
| |
| let dy = |
| d - MONTH_YEAR_DAY_START[(m - 1) as usize] - if is_leap_year(y) && m > 2 { 1 } else { 0 }; |
| |
| (y, m, dy) |
| } |
| |
| /// Formants the specified y, m, d as `yyyy-mm-dd` |
| fn format_ymd(y: i32, m: i32, dy: i32) -> String { |
| format!("{:04}-{:02}-{:02}", 1900 + y, m, dy) |
| } |
| |
| /// Helpers duplicated to avoid circular references |
| const fn julian(date: i32) -> i32 { |
| let mut offset = date - MIN_GENERATE_DATE; |
| let mut result = MIN_GENERATE_DATE; |
| |
| loop { |
| let year = result / 1000; |
| let year_end = year * 1000 + 365 + if is_leap_year(year) { 1 } else { 0 }; |
| |
| if result + offset <= year_end { |
| break; |
| } |
| |
| offset -= year_end - result + 1; |
| result += 1000; |
| } |
| |
| result + offset |
| } |
| |
| const fn is_leap_year(year: i32) -> bool { |
| year % 4 == 0 && year % 100 != 0 |
| } |
| |
| const fn leap_year_adjustment(year: i32, month: i32) -> i32 { |
| if is_leap_year(year) && month >= 2 { |
| 1 |
| } else { |
| 0 |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::*; |
| #[test] |
| fn test_date_strings() { |
| let date = TPCHDate::new(MIN_GENERATE_DATE + 1, 0, 0, 0); |
| assert_eq!(date.to_string(), "1992-01-02 00:00:00"); |
| |
| let date = TPCHDate::new(MIN_GENERATE_DATE + 1234, 0, 0, 0); |
| assert_eq!(date.to_string(), "1995-05-19 00:00:00"); |
| |
| let date = TPCHDate::new(MIN_GENERATE_DATE + TOTAL_DATE_RANGE - 1, 0, 0, 0); |
| assert_eq!(date.to_string(), "1998-12-31 00:00:00"); |
| } |
| |
| #[test] |
| fn test_display_dates() { |
| for index in [1, 23, 321, 623, 1234, 2345, 2556] { |
| let date = TPCHDate::new(MIN_GENERATE_DATE + index, 10, 30, 45); |
| let (y, m, dy) = date.to_ymd(); |
| assert_eq!( |
| format!("{} 10:30:45", format_ymd(y, m, dy)), |
| date.to_string() |
| ); |
| } |
| } |
| |
| #[test] |
| fn test_date_epoch_consistency() { |
| // Check that dates are actually machine some epochs. |
| let date = TPCHDate::new(MIN_GENERATE_DATE + 1, 0, 0, 0); |
| assert_eq!(date.to_unix_epoch(), 8036); |
| |
| let date = TPCHDate::new(MIN_GENERATE_DATE + 1234, 0, 0, 0); |
| // 1995-05-19 00:00:00 (12:00:00 AM) |
| assert_eq!(date.to_string(), "1995-05-19 00:00:00"); |
| assert_eq!(date.to_unix_epoch(), 9269); |
| } |
| } |