apply black update
diff --git a/datafusion/__init__.py b/datafusion/__init__.py
index b2e1028..bc36cef 100644
--- a/datafusion/__init__.py
+++ b/datafusion/__init__.py
@@ -104,9 +104,7 @@
Create a new User Defined Aggregate Function
"""
if not issubclass(accum, Accumulator):
- raise TypeError(
- "`accum` must implement the abstract base class Accumulator"
- )
+ raise TypeError("`accum` must implement the abstract base class Accumulator")
if name is None:
name = accum.__qualname__
return AggregateUDF(
diff --git a/datafusion/tests/generic.py b/datafusion/tests/generic.py
index 1f984a4..0739979 100644
--- a/datafusion/tests/generic.py
+++ b/datafusion/tests/generic.py
@@ -50,9 +50,7 @@
datetime.datetime.now() - datetime.timedelta(days=1),
datetime.datetime.now() + datetime.timedelta(days=1),
]
- return pa.array(
- data, type=pa.timestamp(f), mask=np.array([False, True, False])
- )
+ return pa.array(data, type=pa.timestamp(f), mask=np.array([False, True, False]))
def data_date32():
@@ -61,9 +59,7 @@
datetime.date(1980, 1, 1),
datetime.date(2030, 1, 1),
]
- return pa.array(
- data, type=pa.date32(), mask=np.array([False, True, False])
- )
+ return pa.array(data, type=pa.date32(), mask=np.array([False, True, False]))
def data_timedelta(f):
@@ -72,9 +68,7 @@
datetime.timedelta(days=1),
datetime.timedelta(seconds=1),
]
- return pa.array(
- data, type=pa.duration(f), mask=np.array([False, True, False])
- )
+ return pa.array(data, type=pa.duration(f), mask=np.array([False, True, False]))
def data_binary_other():
diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py
index 51f7c22..7c75466 100644
--- a/datafusion/tests/test_dataframe.py
+++ b/datafusion/tests/test_dataframe.py
@@ -124,9 +124,7 @@
def test_with_column_renamed(df):
- df = df.with_column("c", column("a") + column("b")).with_column_renamed(
- "c", "sum"
- )
+ df = df.with_column("c", column("a") + column("b")).with_column_renamed("c", "sum")
result = df.collect()[0]
@@ -190,9 +188,7 @@
[pa.array([1, 2, 3]), pa.array([4, 5, 6])],
names=["a", "b"],
)
- df_b = ctx.create_dataframe([[batch]]).sort(
- column("a").sort(ascending=True)
- )
+ df_b = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))
assert df_a.collect() == df_b.collect()
@@ -201,9 +197,7 @@
df = df.select(
column("a"),
f.alias(
- f.window(
- "lead", [column("b")], order_by=[f.order_by(column("b"))]
- ),
+ f.window("lead", [column("b")], order_by=[f.order_by(column("b"))]),
"a_next",
),
)
@@ -282,9 +276,7 @@
[pa.array([3]), pa.array([6])],
names=["a", "b"],
)
- df_c = ctx.create_dataframe([[batch]]).sort(
- column("a").sort(ascending=True)
- )
+ df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))
df_a_i_b = df_a.intersect(df_b).sort(column("a").sort(ascending=True))
@@ -310,9 +302,7 @@
[pa.array([1, 2]), pa.array([4, 5])],
names=["a", "b"],
)
- df_c = ctx.create_dataframe([[batch]]).sort(
- column("a").sort(ascending=True)
- )
+ df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))
df_a_e_b = df_a.except_all(df_b).sort(column("a").sort(ascending=True))
@@ -347,9 +337,7 @@
[pa.array([1, 2, 3, 3, 4, 5]), pa.array([4, 5, 6, 6, 7, 8])],
names=["a", "b"],
)
- df_c = ctx.create_dataframe([[batch]]).sort(
- column("a").sort(ascending=True)
- )
+ df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))
df_a_u_b = df_a.union(df_b).sort(column("a").sort(ascending=True))
@@ -373,9 +361,7 @@
[pa.array([1, 2, 3, 4, 5]), pa.array([4, 5, 6, 7, 8])],
names=["a", "b"],
)
- df_c = ctx.create_dataframe([[batch]]).sort(
- column("a").sort(ascending=True)
- )
+ df_c = ctx.create_dataframe([[batch]]).sort(column("a").sort(ascending=True))
df_a_u_b = df_a.union(df_b, True).sort(column("a").sort(ascending=True))
diff --git a/datafusion/tests/test_functions.py b/datafusion/tests/test_functions.py
index daa2f19..93ec3cf 100644
--- a/datafusion/tests/test_functions.py
+++ b/datafusion/tests/test_functions.py
@@ -59,9 +59,7 @@
"""
Test literals with arithmetic operations
"""
- df = df.select(
- literal(1) + column("b"), f.concat(column("a"), literal("!"))
- )
+ df = df.select(literal(1) + column("b"), f.concat(column("a"), literal("!")))
result = df.collect()
assert len(result) == 1
result = result[0]
@@ -72,9 +70,7 @@
def test_math_functions():
ctx = SessionContext()
# create a RecordBatch and a new DataFrame from it
- batch = pa.RecordBatch.from_arrays(
- [pa.array([0.1, -0.7, 0.55])], names=["value"]
- )
+ batch = pa.RecordBatch.from_arrays([pa.array([0.1, -0.7, 0.55])], names=["value"])
df = ctx.create_dataframe([[batch]])
values = np.array([0.1, -0.7, 0.55])
@@ -103,15 +99,9 @@
np.testing.assert_array_almost_equal(result.column(4), np.arcsin(values))
np.testing.assert_array_almost_equal(result.column(5), np.arccos(values))
np.testing.assert_array_almost_equal(result.column(6), np.exp(values))
- np.testing.assert_array_almost_equal(
- result.column(7), np.log(values + 1.0)
- )
- np.testing.assert_array_almost_equal(
- result.column(8), np.log2(values + 1.0)
- )
- np.testing.assert_array_almost_equal(
- result.column(9), np.log10(values + 1.0)
- )
+ np.testing.assert_array_almost_equal(result.column(7), np.log(values + 1.0))
+ np.testing.assert_array_almost_equal(result.column(8), np.log2(values + 1.0))
+ np.testing.assert_array_almost_equal(result.column(9), np.log10(values + 1.0))
np.testing.assert_array_less(result.column(10), np.ones_like(values))
@@ -149,18 +139,9 @@
)
assert result.column(1) == pa.array(
[
- b(
- "185F8DB32271FE25F561A6FC938B2E26"
- "4306EC304EDA518007D1764826381969"
- ),
- b(
- "78AE647DC5544D227130A0682A51E30B"
- "C7777FBB6D8A8F17007463A3ECD1D524"
- ),
- b(
- "BB7208BC9B5D7C04F1236A82A0093A5E"
- "33F40423D5BA8D4266F7092C3BA43B62"
- ),
+ b("185F8DB32271FE25F561A6FC938B2E26" "4306EC304EDA518007D1764826381969"),
+ b("78AE647DC5544D227130A0682A51E30B" "C7777FBB6D8A8F17007463A3ECD1D524"),
+ b("BB7208BC9B5D7C04F1236A82A0093A5E" "33F40423D5BA8D4266F7092C3BA43B62"),
]
)
assert result.column(2) == pa.array(
@@ -187,33 +168,15 @@
)
assert result.column(3) == pa.array(
[
- b(
- "F73A5FBF881F89B814871F46E26AD3FA"
- "37CB2921C5E8561618639015B3CCBB71"
- ),
- b(
- "B792A0383FB9E7A189EC150686579532"
- "854E44B71AC394831DAED169BA85CCC5"
- ),
- b(
- "27988A0E51812297C77A433F63523334"
- "6AEE29A829DCF4F46E0F58F402C6CFCB"
- ),
+ b("F73A5FBF881F89B814871F46E26AD3FA" "37CB2921C5E8561618639015B3CCBB71"),
+ b("B792A0383FB9E7A189EC150686579532" "854E44B71AC394831DAED169BA85CCC5"),
+ b("27988A0E51812297C77A433F63523334" "6AEE29A829DCF4F46E0F58F402C6CFCB"),
]
)
assert result.column(4) == pa.array(
[
- b(
- "FBC2B0516EE8744D293B980779178A35"
- "08850FDCFE965985782C39601B65794F"
- ),
- b(
- "BF73D18575A736E4037D45F9E316085B"
- "86C19BE6363DE6AA789E13DEAACC1C4E"
- ),
- b(
- "C8D11B9F7237E4034ADBCD2005735F9B"
- "C4C597C75AD89F4492BEC8F77D15F7EB"
- ),
+ b("FBC2B0516EE8744D293B980779178A35" "08850FDCFE965985782C39601B65794F"),
+ b("BF73D18575A736E4037D45F9E316085B" "86C19BE6363DE6AA789E13DEAACC1C4E"),
+ b("C8D11B9F7237E4034ADBCD2005735F9B" "C4C597C75AD89F4492BEC8F77D15F7EB"),
]
)
diff --git a/datafusion/tests/test_sql.py b/datafusion/tests/test_sql.py
index 19c2766..f430f32 100644
--- a/datafusion/tests/test_sql.py
+++ b/datafusion/tests/test_sql.py
@@ -72,9 +72,7 @@
result = pa.Table.from_batches(result)
assert result.schema == alternative_schema
- with pytest.raises(
- ValueError, match="Delimiter must be a single character"
- ):
+ with pytest.raises(ValueError, match="Delimiter must be a single character"):
ctx.register_csv("csv4", path, delimiter="wrong")
@@ -114,9 +112,7 @@
)
assert ctx.tables() == {"datapp"}
- result = ctx.sql(
- "SELECT grp, COUNT(*) AS cnt FROM datapp GROUP BY grp"
- ).collect()
+ result = ctx.sql("SELECT grp, COUNT(*) AS cnt FROM datapp GROUP BY grp").collect()
result = pa.Table.from_batches(result)
rd = result.to_pydict()
@@ -183,9 +179,7 @@
).collect()
expected_a = pa.array([50.0219, 50.0152], pa.float64())
expected_cast = pa.array([50, 50], pa.int32())
- expected = [
- pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"])
- ]
+ expected = [pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"])]
np.testing.assert_equal(expected[0].column(1), expected[0].column(1))
@@ -205,9 +199,7 @@
"float",
]
- select = ", ".join(
- [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)]
- )
+ select = ", ".join([f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)])
# can execute, which implies that we can cast
ctx.sql(f"SELECT {select} FROM t").collect()
@@ -236,14 +228,10 @@
ctx, tmp_path, fn, input_types, output_type, input_values, expected_values
):
# write to disk
- path = helpers.write_parquet(
- tmp_path / "a.parquet", pa.array(input_values)
- )
+ path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(input_values))
ctx.register_parquet("t", path)
- func = udf(
- fn, input_types, output_type, name="func", volatility="immutable"
- )
+ func = udf(fn, input_types, output_type, name="func", volatility="immutable")
ctx.register_udf(func)
batches = ctx.sql("SELECT func(a) AS tt FROM t").collect()
diff --git a/dev/release/check-rat-report.py b/dev/release/check-rat-report.py
index 30a0111..d3dd7c5 100644
--- a/dev/release/check-rat-report.py
+++ b/dev/release/check-rat-report.py
@@ -23,9 +23,7 @@
import xml.etree.ElementTree as ET
if len(sys.argv) != 3:
- sys.stderr.write(
- "Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0]
- )
+ sys.stderr.write("Usage: %s exclude_globs.lst rat_report.xml\n" % sys.argv[0])
sys.exit(1)
exclude_globs_filename = sys.argv[1]