Turn `ObjectStoreLocationProvider` off by default (#1722)
Closes #1721
Otherwise, there's a default behavior change in new 0.9.0 release.
Previous versions will write to `data/`, new version will write to
`data/<hash>/`
diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py
index e625b84..679f74d 100644
--- a/pyiceberg/table/__init__.py
+++ b/pyiceberg/table/__init__.py
@@ -202,7 +202,7 @@
WRITE_PY_LOCATION_PROVIDER_IMPL = "write.py-location-provider.impl"
OBJECT_STORE_ENABLED = "write.object-storage.enabled"
- OBJECT_STORE_ENABLED_DEFAULT = True
+ OBJECT_STORE_ENABLED_DEFAULT = False
WRITE_OBJECT_STORE_PARTITIONED_PATHS = "write.object-storage.partitioned-paths"
WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT = True
diff --git a/tests/integration/test_writes/test_partitioned_writes.py b/tests/integration/test_writes/test_partitioned_writes.py
index 1e6ea1b..a299036 100644
--- a/tests/integration/test_writes/test_partitioned_writes.py
+++ b/tests/integration/test_writes/test_partitioned_writes.py
@@ -294,13 +294,14 @@
PartitionField(source_id=nested_field.field_id, field_id=1001, transform=IdentityTransform(), name=part_col)
)
- # write.object-storage.enabled and write.object-storage.partitioned-paths don't need to be specified as they're on by default
- assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT
- assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT
+ # Enable `write.object-storage.enabled` which is False by default
+ # `write.object-storage.partitioned-paths` is True by default
+ assert TableProperties.OBJECT_STORE_ENABLED_DEFAULT is False
+ assert TableProperties.WRITE_OBJECT_STORE_PARTITIONED_PATHS_DEFAULT is True
tbl = _create_table(
session_catalog=session_catalog,
identifier=f"default.arrow_table_v{format_version}_with_null_partitioned_on_col_{part_col}",
- properties={"format-version": str(format_version)},
+ properties={"format-version": str(format_version), TableProperties.OBJECT_STORE_ENABLED: True},
data=[arrow_table_with_null],
partition_spec=partition_spec,
)
diff --git a/tests/table/test_locations.py b/tests/table/test_locations.py
index d66bf18..4efa643 100644
--- a/tests/table/test_locations.py
+++ b/tests/table/test_locations.py
@@ -74,7 +74,7 @@
def test_object_storage_no_partition() -> None:
- provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+ provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
location = provider.new_data_location("test.parquet")
parts = location.split("/")
@@ -111,6 +111,7 @@
provider = load_location_provider(
table_location="table_location",
table_properties={
+ "write.object-storage.enabled": "true",
"write.object-storage.partitioned-paths": "false",
},
)
@@ -131,7 +132,7 @@
],
)
def test_hash_injection(data_file_name: str, expected_hash: str) -> None:
- provider = load_location_provider(table_location="table_location", table_properties=EMPTY_DICT)
+ provider = load_location_provider(table_location="table_location", table_properties={"write.object-storage.enabled": "true"})
assert provider.new_data_location(data_file_name) == f"table_location/data/{expected_hash}/{data_file_name}"
@@ -139,7 +140,10 @@
def test_object_location_provider_write_data_path() -> None:
provider = load_location_provider(
table_location="s3://table-location/table",
- table_properties={TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path"},
+ table_properties={
+ "write.object-storage.enabled": "true",
+ TableProperties.WRITE_DATA_PATH: "s3://table-location/custom/data/path",
+ },
)
assert (