feat: load Overture Divisions from Hugging Face over HTTPS instead of S3 (#54)

* load from HF over https

* Apply suggestions from code review

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* Bump rust version to 1.90.0 (#55)

* bump rust version

* clippy fix

* Upgrade arrow/parquet to 56.2.0 and datafusion to 50.2.0 (#56)

* update arrow, parquet and datafusion versions

* update tests

* don't fully pin version to allow patches

* fix test

* fmt fix

* pin to commit hash

* fix typo

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
diff --git a/spatialbench-cli/Cargo.toml b/spatialbench-cli/Cargo.toml
index de6e59e..5c568b6 100644
--- a/spatialbench-cli/Cargo.toml
+++ b/spatialbench-cli/Cargo.toml
@@ -24,7 +24,7 @@
 anyhow = "1.0.99"
 serde_yaml = "0.9.33"
 datafusion = "50.2"
-object_store = { version = "0.12.4", features = ["aws"] }
+object_store = { version = "0.12.4", features = ["http"] }
 arrow-array = "56"
 arrow-schema = "56"
 url = "2.5.7"
diff --git a/spatialbench-cli/src/zone_df.rs b/spatialbench-cli/src/zone_df.rs
index 464b61d..eb75805 100644
--- a/spatialbench-cli/src/zone_df.rs
+++ b/spatialbench-cli/src/zone_df.rs
@@ -11,8 +11,7 @@
 use crate::plan::DEFAULT_PARQUET_ROW_GROUP_BYTES;
 use datafusion::execution::runtime_env::RuntimeEnv;
 use log::{debug, info};
-use object_store::aws::AmazonS3Builder;
-use object_store::ObjectStore;
+use object_store::http::HttpBuilder;
 use parquet::{
     arrow::ArrowWriter, basic::Compression as ParquetCompression,
     file::properties::WriterProperties,
@@ -20,15 +19,8 @@
 use url::Url;
 
 const OVERTURE_RELEASE_DATE: &str = "2025-08-20.1";
-const OVERTURE_S3_BUCKET: &str = "overturemaps-us-west-2";
-const OVERTURE_S3_PREFIX: &str = "release";
-
-fn zones_parquet_url() -> String {
-    format!(
-        "s3://{}/{}/{}/theme=divisions/type=division_area/",
-        OVERTURE_S3_BUCKET, OVERTURE_S3_PREFIX, OVERTURE_RELEASE_DATE
-    )
-}
+const HUGGINGFACE_URL: &str = "https://huggingface.co";
+const COMMIT_HASH: &str = "67822daa2fbc0039681922f0d7fea4157f41d13f";
 
 fn subtypes_for_scale_factor(sf: f64) -> Vec<&'static str> {
     let mut v = vec!["microhood", "macrohood", "county"];
@@ -200,29 +192,39 @@
     let rt: Arc<RuntimeEnv> = Arc::new(RuntimeEnvBuilder::new().build()?);
     debug!("Built DataFusion runtime environment");
 
-    // Register S3 store for Overture bucket
-    let bucket = OVERTURE_S3_BUCKET;
-    info!("Registering S3 store for bucket: {}", bucket);
-    let s3 = AmazonS3Builder::new()
-        .with_bucket_name(bucket)
-        .with_skip_signature(true)
-        .with_region("us-west-2")
-        .build()?;
-
-    let s3_url = Url::parse(&format!("s3://{bucket}"))?;
-    let s3_store: Arc<dyn ObjectStore> = Arc::new(s3);
-    rt.register_object_store(&s3_url, s3_store);
-    debug!("Successfully registered S3 object store");
+    // Register HTTPS object store for Hugging Face
+    let hf_store = HttpBuilder::new().with_url(HUGGINGFACE_URL).build()?;
+    let hf_url = Url::parse(HUGGINGFACE_URL)?;
+    rt.register_object_store(&hf_url, Arc::new(hf_store));
+    debug!("Registered HTTPS object store for huggingface.co");
 
     let ctx = SessionContext::new_with_config_rt(SessionConfig::from(cfg), rt);
     debug!("Created DataFusion session context");
 
-    let url = zones_parquet_url();
-    info!("Reading parquet data from: {}", url);
+    // Parquet parts from Hugging Face (programmatically generated)
+    const PARQUET_PART_COUNT: usize = 4;
+    const PARQUET_UUID: &str = "c998b093-fa14-440c-98f0-bbdb2126ed22";
+    let parquet_urls: Vec<String> = (0..PARQUET_PART_COUNT)
+        .map(|i| format!(
+            "https://huggingface.co/datasets/apache-sedona/spatialbench/resolve/{}/omf-division-area-{}/part-{i:05}-{uuid}-c000.zstd.parquet",
+            COMMIT_HASH,
+            OVERTURE_RELEASE_DATE,
+            i = i,
+            uuid = PARQUET_UUID
+        ))
+        .collect();
+
+    info!(
+        "Reading {} Parquet parts from Hugging Face...",
+        parquet_urls.len()
+    );
+
     let t_read_start = Instant::now();
-    let mut df = ctx.read_parquet(url, ParquetReadOptions::default()).await?;
+    let mut df = ctx
+        .read_parquet(parquet_urls, ParquetReadOptions::default())
+        .await?;
     let read_dur = t_read_start.elapsed();
-    info!("Successfully read parquet data in {:?}", read_dur);
+    info!("Successfully read HF parquet data in {:?}", read_dur);
 
     // Build filter predicate
     debug!("Building filter predicate for subtypes: {:?}", subtypes);