ARROW-12299: [Python] Recognize new filesytems in pq.write_to_dataset This adds similar logic as we have in ParquetDataset: if a new-style filesystem is passed (which is not supported in the legacy implementation), automatically default to the new implementation. Closes #9967 from jorisvandenbossche/ARROW-12299 Authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com> Signed-off-by: David Li <li.davidm96@gmail.com>

commit: 077fe6435495faedafbee2c217743cda4e2e6ec2 [log] [tgz]
author: Joris Van den Bossche <jorisvandenbossche@gmail.com> Mon Apr 12 08:34:16 2021 -0400
committer: David Li <li.davidm96@gmail.com> Mon Apr 12 08:34:16 2021 -0400
tree: e858d670a62e2b7ead44b87048f255f1e05f10eb
parent: 5b08205f7e864ed29f53ed3d836845fed62d5d4a [diff]
diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
index 37bca4e..1b0a336 100644
--- a/python/pyarrow/parquet.py
+++ b/python/pyarrow/parquet.py

@@ -1845,7 +1845,7 @@
 
 def write_to_dataset(table, root_path, partition_cols=None,
                      partition_filename_cb=None, filesystem=None,
-                     use_legacy_dataset=True, **kwargs):
+                     use_legacy_dataset=None, **kwargs):
     """Wrapper around parquet.write_table for writing a Table to
     Parquet format by partitions.
     For each combination of partition columns and values,
@@ -1879,7 +1879,8 @@
         A callback function that takes the partition key(s) as an argument
         and allow you to override the partition filename. If nothing is
         passed, the filename will consist of a uuid.
-    use_legacy_dataset : bool, default True
+    use_legacy_dataset : bool
+        Default is True unless a ``pyarrow.fs`` filesystem is passed.
         Set to False to enable the new code path (experimental, using the
         new Arrow Dataset API). This is more efficient when using partition
         columns, but does not (yet) support `partition_filename_cb` and
@@ -1891,6 +1892,14 @@
         file metadata instances of dataset pieces. The file paths in the
         ColumnChunkMetaData will be set relative to `root_path`.
     """
+    if use_legacy_dataset is None:
+        # if a new filesystem is passed -> default to new implementation
+        if isinstance(filesystem, FileSystem):
+            use_legacy_dataset = False
+        # otherwise the default is still True
+        else:
+            use_legacy_dataset = True
+
     if not use_legacy_dataset:
         import pyarrow.dataset as ds
 

diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index a0d417b..fb58fc4 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py

@@ -1377,6 +1377,17 @@
     assert sorted(expected_basenames) == sorted(output_basenames)
 
 
+@pytest.mark.pandas
+def test_write_to_dataset_filesystem(tempdir):
+    df = pd.DataFrame({'A': [1, 2, 3]})
+    table = pa.Table.from_pandas(df)
+    path = str(tempdir)
+
+    pq.write_to_dataset(table, path, filesystem=fs.LocalFileSystem())
+    result = pq.read_table(path)
+    assert result.equals(table)
+
+
 # TODO(dataset) support pickling
 def _make_dataset_for_pickling(tempdir, N=100):
     path = tempdir / 'data.parquet'
commit	077fe6435495faedafbee2c217743cda4e2e6ec2	[log] [tgz]
author	Joris Van den Bossche <jorisvandenbossche@gmail.com>	Mon Apr 12 08:34:16 2021 -0400
committer	David Li <li.davidm96@gmail.com>	Mon Apr 12 08:34:16 2021 -0400
tree	e858d670a62e2b7ead44b87048f255f1e05f10eb
parent	5b08205f7e864ed29f53ed3d836845fed62d5d4a [diff]