python/pyspark/pandas/tests/computation/test_combine.py - spark - Git at Google

 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 import unittest

 import numpy as np
 import pandas as pd

 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils


 # This file contains test cases for 'Combining / joining / merging'
 # https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/frame.html#combining-joining-merging
 class FrameCombineMixin:
     def test_concat(self):
         pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"))
         psdf = ps.from_pandas(pdf)
         other_pdf = pd.DataFrame([[3, 4], [5, 6]], columns=list("BC"), index=[2, 3])
         other_psdf = ps.from_pandas(other_pdf)

         self.assert_eq(ps.concat([psdf, psdf]), pd.concat([pdf, pdf]))
         self.assert_eq(
             ps.concat([psdf, psdf], ignore_index=True), pd.concat([pdf, pdf], ignore_index=True)
         )

         # Assert DataFrames with non-matching columns
         self.assert_eq(ps.concat([psdf, other_psdf]), pd.concat([pdf, other_pdf]))

         ps.concat([psdf, psdf["A"]])
         # Assert appending a Series
         self.assert_eq(ps.concat([psdf, psdf["A"]]), pd.concat([pdf, pdf["A"]]))

         # Assert using the sort parameter
         self.assert_eq(ps.concat([psdf, psdf], sort=True), pd.concat([pdf, pdf], sort=True))

         # Assert appending multi-index DataFrames
         multi_index_pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[[2, 3], [4, 5]])
         multi_index_psdf = ps.from_pandas(multi_index_pdf)
         other_multi_index_pdf = pd.DataFrame(
             [[5, 6], [7, 8]], columns=list("AB"), index=[[2, 3], [6, 7]]
         )
         other_multi_index_psdf = ps.from_pandas(other_multi_index_pdf)

         self.assert_eq(
             ps.concat([multi_index_psdf, multi_index_psdf]),
             pd.concat([multi_index_pdf, multi_index_pdf]),
         )

         # Assert DataFrames with non-matching columns
         self.assert_eq(
             ps.concat([multi_index_psdf, other_multi_index_psdf]),
             pd.concat([multi_index_pdf, other_multi_index_pdf]),
         )

         # Assert trying to append DataFrames with different index levels
         msg = "Both DataFrames have to have the same number of index levels"
         with self.assertRaises(ValueError, msg=msg):
             ps.concat([psdf, multi_index_psdf])

         # Skip index level check when ignore_index=True
         self.assert_eq(
             ps.concat([psdf, other_multi_index_psdf], ignore_index=True),
             pd.concat([pdf, other_multi_index_pdf], ignore_index=True),
         )

         columns = pd.MultiIndex.from_tuples([("A", "X"), ("A", "Y")])
         pdf.columns = columns
         psdf.columns = columns

         self.assert_eq(ps.concat([psdf, psdf]), pd.concat([pdf, pdf]))

     def test_merge(self):
         left_pdf = pd.DataFrame(
             {
                 "lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
                 "value": [1, 2, 3, 5, 6, 7],
                 "x": list("abcdef"),
             },
             columns=["lkey", "value", "x"],
         )
         right_pdf = pd.DataFrame(
             {
                 "rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
                 "value": [4, 5, 6, 7, 8, 9],
                 "y": list("efghij"),
             },
             columns=["rkey", "value", "y"],
         )
         right_pser = pd.Series(list("defghi"), name="x", index=[5, 6, 7, 8, 9, 10])

         left_psdf = ps.from_pandas(left_pdf)
         right_psdf = ps.from_pandas(right_pdf)
         right_psser = ps.from_pandas(right_pser)

         def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
             ps_res = op(left_psdf, right_psdf)
             ps_res = ps_res._to_pandas()
             ps_res = ps_res.sort_values(by=list(ps_res.columns))
             ps_res = ps_res.reset_index(drop=True)
             p_res = op(left_pdf, right_pdf)
             p_res = p_res.sort_values(by=list(p_res.columns))
             p_res = p_res.reset_index(drop=True)
             self.assert_eq(ps_res, p_res)

         check(lambda left, right: left.merge(right))
         check(lambda left, right: left.merge(right, on="value"))
         check(lambda left, right: left.merge(right, on=("value",)))
         check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
         check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
         check(
             lambda left, right: left.set_index("lkey").merge(
                 right, left_index=True, right_on="rkey"
             )
         )
         check(
             lambda left, right: left.merge(
                 right.set_index("rkey"), left_on="lkey", right_index=True
             )
         )
         check(
             lambda left, right: left.set_index("lkey").merge(
                 right.set_index("rkey"), left_index=True, right_index=True
             )
         )

         # MultiIndex
         check(
             lambda left, right: left.merge(
                 right, left_on=["lkey", "value"], right_on=["rkey", "value"]
             )
         )
         check(
             lambda left, right: left.set_index(["lkey", "value"]).merge(
                 right, left_index=True, right_on=["rkey", "value"]
             )
         )
         check(
             lambda left, right: left.merge(
                 right.set_index(["rkey", "value"]), left_on=["lkey", "value"], right_index=True
             )
         )
         # TODO: when both left_index=True and right_index=True with multi-index
         # check(lambda left, right: left.set_index(['lkey', 'value']).merge(
         #     right.set_index(['rkey', 'value']), left_index=True, right_index=True))

         # join types
         for how in ["inner", "left", "right", "outer"]:
             check(lambda left, right: left.merge(right, on="value", how=how))
             check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey", how=how))

         # suffix
         check(
             lambda left, right: left.merge(
                 right, left_on="lkey", right_on="rkey", suffixes=["_left", "_right"]
             )
         )

         # Test Series on the right
         check(lambda left, right: left.merge(right), right_psser, right_pser)
         check(
             lambda left, right: left.merge(right, left_on="x", right_on="x"),
             right_psser,
             right_pser,
         )
         check(
             lambda left, right: left.set_index("x").merge(right, left_index=True, right_on="x"),
             right_psser,
             right_pser,
         )

         # Test join types with Series
         for how in ["inner", "left", "right", "outer"]:
             check(lambda left, right: left.merge(right, how=how), right_psser, right_pser)
             check(
                 lambda left, right: left.merge(right, left_on="x", right_on="x", how=how),
                 right_psser,
                 right_pser,
             )

         # suffix with Series
         check(
             lambda left, right: left.merge(
                 right,
                 suffixes=["_left", "_right"],
                 how="outer",
                 left_index=True,
                 right_index=True,
             ),
             right_psser,
             right_pser,
         )

         # multi-index columns
         left_columns = pd.MultiIndex.from_tuples([(10, "lkey"), (10, "value"), (20, "x")])
         left_pdf.columns = left_columns
         left_psdf.columns = left_columns

         right_columns = pd.MultiIndex.from_tuples([(10, "rkey"), (10, "value"), (30, "y")])
         right_pdf.columns = right_columns
         right_psdf.columns = right_columns

         check(lambda left, right: left.merge(right))
         check(lambda left, right: left.merge(right, on=[(10, "value")]))
         check(
             lambda left, right: (left.set_index((10, "lkey")).merge(right.set_index((10, "rkey"))))
         )
         check(
             lambda left, right: (
                 left.set_index((10, "lkey")).merge(
                     right.set_index((10, "rkey")), left_index=True, right_index=True
                 )
             )
         )
         # TODO: when both left_index=True and right_index=True with multi-index columns
         # check(lambda left, right: left.merge(right,
         #                                      left_on=[('a', 'lkey')], right_on=[('a', 'rkey')]))
         # check(lambda left, right: (left.set_index(('a', 'lkey'))
         #                            .merge(right, left_index=True, right_on=[('a', 'rkey')])))

         # non-string names
         left_pdf.columns = [10, 100, 1000]
         left_psdf.columns = [10, 100, 1000]

         right_pdf.columns = [20, 100, 2000]
         right_psdf.columns = [20, 100, 2000]

         check(lambda left, right: left.merge(right))
         check(lambda left, right: left.merge(right, on=[100]))
         check(lambda left, right: (left.set_index(10).merge(right.set_index(20))))
         check(
             lambda left, right: (
                 left.set_index(10).merge(right.set_index(20), left_index=True, right_index=True)
             )
         )

     def test_merge_same_anchor(self):
         pdf = pd.DataFrame(
             {
                 "lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
                 "rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
                 "value": [1, 1, 3, 5, 6, 7],
                 "x": list("abcdef"),
                 "y": list("efghij"),
             },
             columns=["lkey", "rkey", "value", "x", "y"],
         )
         psdf = ps.from_pandas(pdf)

         left_pdf = pdf[["lkey", "value", "x"]]
         right_pdf = pdf[["rkey", "value", "y"]]
         left_psdf = psdf[["lkey", "value", "x"]]
         right_psdf = psdf[["rkey", "value", "y"]]

         def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
             k_res = op(left_psdf, right_psdf)
             k_res = k_res._to_pandas()
             k_res = k_res.sort_values(by=list(k_res.columns))
             k_res = k_res.reset_index(drop=True)
             p_res = op(left_pdf, right_pdf)
             p_res = p_res.sort_values(by=list(p_res.columns))
             p_res = p_res.reset_index(drop=True)
             self.assert_eq(k_res, p_res)

         check(lambda left, right: left.merge(right))
         check(lambda left, right: left.merge(right, on="value"))
         check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
         check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
         check(
             lambda left, right: left.set_index("lkey").merge(
                 right, left_index=True, right_on="rkey"
             )
         )
         check(
             lambda left, right: left.merge(
                 right.set_index("rkey"), left_on="lkey", right_index=True
             )
         )
         check(
             lambda left, right: left.set_index("lkey").merge(
                 right.set_index("rkey"), left_index=True, right_index=True
             )
         )

     def test_merge_retains_indices(self):
         left_pdf = pd.DataFrame({"A": [0, 1]})
         right_pdf = pd.DataFrame({"B": [1, 2]}, index=[1, 2])
         left_psdf = ps.from_pandas(left_pdf)
         right_psdf = ps.from_pandas(right_pdf)

         self.assert_eq(
             left_psdf.merge(right_psdf, left_index=True, right_index=True),
             left_pdf.merge(right_pdf, left_index=True, right_index=True),
         )
         self.assert_eq(
             left_psdf.merge(right_psdf, left_on="A", right_index=True),
             left_pdf.merge(right_pdf, left_on="A", right_index=True),
         )
         self.assert_eq(
             left_psdf.merge(right_psdf, left_index=True, right_on="B"),
             left_pdf.merge(right_pdf, left_index=True, right_on="B"),
         )
         self.assert_eq(
             left_psdf.merge(right_psdf, left_on="A", right_on="B"),
             left_pdf.merge(right_pdf, left_on="A", right_on="B"),
         )

     def test_merge_how_parameter(self):
         left_pdf = pd.DataFrame({"A": [1, 2]})
         right_pdf = pd.DataFrame({"B": ["x", "y"]}, index=[1, 2])
         left_psdf = ps.from_pandas(left_pdf)
         right_psdf = ps.from_pandas(right_pdf)

         psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True)
         pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True)
         self.assert_eq(
             psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
             pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
         )

         psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="left")
         pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="left")
         self.assert_eq(
             psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
             pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
         )

         psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="right")
         pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="right")
         self.assert_eq(
             psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
             pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
         )

         psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="outer")
         pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="outer")
         self.assert_eq(
             psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
             pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
         )

     def test_merge_raises(self):
         left = ps.DataFrame(
             {"value": [1, 2, 3, 5, 6], "x": list("abcde")},
             columns=["value", "x"],
             index=["foo", "bar", "baz", "foo", "bar"],
         )
         right = ps.DataFrame(
             {"value": [4, 5, 6, 7, 8], "y": list("fghij")},
             columns=["value", "y"],
             index=["baz", "foo", "bar", "baz", "foo"],
         )

         with self.assertRaisesRegex(ValueError, "No common columns to perform merge on"):
             left[["x"]].merge(right[["y"]])

         with self.assertRaisesRegex(ValueError, "not a combination of both"):
             left.merge(right, on="value", left_on="x")

         with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
             left.merge(right, left_on="x")

         with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
             left.merge(right, left_index=True)

         with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
             left.merge(right, right_on="y")

         with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
             left.merge(right, right_index=True)

         with self.assertRaisesRegex(
             ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
         ):
             left.merge(right, left_on="value", right_on=["value", "y"])

         with self.assertRaisesRegex(
             ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
         ):
             left.merge(right, left_on=["value", "x"], right_on="value")

         with self.assertRaisesRegex(ValueError, "['inner', 'left', 'right', 'full', 'outer']"):
             left.merge(right, left_index=True, right_index=True, how="foo")

         with self.assertRaisesRegex(KeyError, "id"):
             left.merge(right, on="id")

     def test_join(self):
         # check basic function
         pdf1 = pd.DataFrame(
             {"key": ["K0", "K1", "K2", "K3"], "A": ["A0", "A1", "A2", "A3"]}, columns=["key", "A"]
         )
         pdf2 = pd.DataFrame(
             {"key": ["K0", "K1", "K2"], "B": ["B0", "B1", "B2"]}, columns=["key", "B"]
         )
         psdf1 = ps.from_pandas(pdf1)
         psdf2 = ps.from_pandas(pdf2)

         join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

         self.assert_eq(join_pdf, join_psdf)

         # join with duplicated columns in Series
         with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
             ks1 = ps.Series(["A1", "A5"], index=[1, 2], name="A")
             psdf1.join(ks1, how="outer")
         # join with duplicated columns in DataFrame
         with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
             psdf1.join(psdf2, how="outer")

         # check `on` parameter
         join_pdf = pdf1.join(pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.join(psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
         self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

         join_pdf = pdf1.set_index("key").join(
             pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
         )
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.set_index("key").join(
             psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
         )
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
         self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

         # multi-index columns
         columns1 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "A")])
         columns2 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "B")])
         pdf1.columns = columns1
         pdf2.columns = columns2
         psdf1.columns = columns1
         psdf2.columns = columns2

         join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

         self.assert_eq(join_pdf, join_psdf)

         # check `on` parameter
         join_pdf = pdf1.join(
             pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
         )
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.join(
             psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
         )
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

         self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

         join_pdf = pdf1.set_index(("x", "key")).join(
             pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
         )
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.set_index(("x", "key")).join(
             psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
         )
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

         self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

         # multi-index
         midx1 = pd.MultiIndex.from_tuples(
             [("w", "a"), ("x", "b"), ("y", "c"), ("z", "d")], names=["index1", "index2"]
         )
         midx2 = pd.MultiIndex.from_tuples(
             [("w", "a"), ("x", "b"), ("y", "c")], names=["index1", "index2"]
         )
         pdf1.index = midx1
         pdf2.index = midx2
         psdf1 = ps.from_pandas(pdf1)
         psdf2 = ps.from_pandas(pdf2)

         join_pdf = pdf1.join(pdf2, on=["index1", "index2"], rsuffix="_right")
         join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

         join_psdf = psdf1.join(psdf2, on=["index1", "index2"], rsuffix="_right")
         join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

         self.assert_eq(join_pdf, join_psdf)

         with self.assertRaisesRegex(
             ValueError, r'len\(left_on\) must equal the number of levels in the index of "right"'
         ):
             psdf1.join(psdf2, on=["index1"], rsuffix="_right")

     def test_update(self):
         # check base function
         def get_data(left_columns=None, right_columns=None):
             left_pdf = pd.DataFrame(
                 {"A": ["1", "2", "3", "4"], "B": ["100", "200", np.nan, np.nan]}, columns=["A", "B"]
             )
             right_pdf = pd.DataFrame(
                 {"B": ["x", np.nan, "y", np.nan], "C": ["100", "200", "300", "400"]},
                 columns=["B", "C"],
             )

             left_psdf = ps.DataFrame(
                 {"A": ["1", "2", "3", "4"], "B": ["100", "200", None, None]}, columns=["A", "B"]
             )
             right_psdf = ps.DataFrame(
                 {"B": ["x", None, "y", None], "C": ["100", "200", "300", "400"]}, columns=["B", "C"]
             )
             if left_columns is not None:
                 left_pdf.columns = left_columns
                 left_psdf.columns = left_columns
             if right_columns is not None:
                 right_pdf.columns = right_columns
                 right_psdf.columns = right_columns
             return left_psdf, left_pdf, right_psdf, right_pdf

         left_psdf, left_pdf, right_psdf, right_pdf = get_data()
         pser = left_pdf.B
         psser = left_psdf.B
         left_pdf.update(right_pdf)
         left_psdf.update(right_psdf)
         self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))
         self.assert_eq(psser.sort_index(), pser.sort_index())

         left_psdf, left_pdf, right_psdf, right_pdf = get_data()
         left_pdf.update(right_pdf, overwrite=False)
         left_psdf.update(right_psdf, overwrite=False)
         self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))

         with self.assertRaises(NotImplementedError):
             left_psdf.update(right_psdf, join="right")

         # multi-index columns
         left_columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
         right_columns = pd.MultiIndex.from_tuples([("X", "B"), ("Y", "C")])

         left_psdf, left_pdf, right_psdf, right_pdf = get_data(
             left_columns=left_columns, right_columns=right_columns
         )
         left_pdf.update(right_pdf)
         left_psdf.update(right_psdf)
         self.assert_eq(
             left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
             left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
         )

         left_psdf, left_pdf, right_psdf, right_pdf = get_data(
             left_columns=left_columns, right_columns=right_columns
         )
         left_pdf.update(right_pdf, overwrite=False)
         left_psdf.update(right_psdf, overwrite=False)
         self.assert_eq(
             left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
             left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
         )

         right_columns = pd.MultiIndex.from_tuples([("Y", "B"), ("Y", "C")])
         left_psdf, left_pdf, right_psdf, right_pdf = get_data(
             left_columns=left_columns, right_columns=right_columns
         )
         left_pdf.update(right_pdf)
         left_psdf.update(right_psdf)
         self.assert_eq(
             left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
             left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
         )


 class FrameCombineTests(
     FrameCombineMixin,
     PandasOnSparkTestCase,
     SQLTestUtils,
 ):
     pass


 if __name__ == "__main__":
     from pyspark.pandas.tests.computation.test_combine import *  # noqa: F401

     try:
         import xmlrunner

         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
         testRunner = None
     unittest.main(testRunner=testRunner, verbosity=2)
	#
	# Licensed to the Apache Software Foundation (ASF) under one or more
	# contributor license agreements. See the NOTICE file distributed with
	# this work for additional information regarding copyright ownership.
	# The ASF licenses this file to You under the Apache License, Version 2.0
	# (the "License"); you may not use this file except in compliance with
	# the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	import unittest

	import numpy as np
	import pandas as pd

	from pyspark import pandas as ps
	from pyspark.testing.pandasutils import PandasOnSparkTestCase
	from pyspark.testing.sqlutils import SQLTestUtils


	# This file contains test cases for 'Combining / joining / merging'
	# https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/frame.html#combining-joining-merging
	class FrameCombineMixin:
	def test_concat(self):
	pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"))
	psdf = ps.from_pandas(pdf)
	other_pdf = pd.DataFrame([[3, 4], [5, 6]], columns=list("BC"), index=[2, 3])
	other_psdf = ps.from_pandas(other_pdf)

	self.assert_eq(ps.concat([psdf, psdf]), pd.concat([pdf, pdf]))
	self.assert_eq(
	ps.concat([psdf, psdf], ignore_index=True), pd.concat([pdf, pdf], ignore_index=True)
	)

	# Assert DataFrames with non-matching columns
	self.assert_eq(ps.concat([psdf, other_psdf]), pd.concat([pdf, other_pdf]))

	ps.concat([psdf, psdf["A"]])
	# Assert appending a Series
	self.assert_eq(ps.concat([psdf, psdf["A"]]), pd.concat([pdf, pdf["A"]]))

	# Assert using the sort parameter
	self.assert_eq(ps.concat([psdf, psdf], sort=True), pd.concat([pdf, pdf], sort=True))

	# Assert appending multi-index DataFrames
	multi_index_pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list("AB"), index=[[2, 3], [4, 5]])
	multi_index_psdf = ps.from_pandas(multi_index_pdf)
	other_multi_index_pdf = pd.DataFrame(
	[[5, 6], [7, 8]], columns=list("AB"), index=[[2, 3], [6, 7]]
	)
	other_multi_index_psdf = ps.from_pandas(other_multi_index_pdf)

	self.assert_eq(
	ps.concat([multi_index_psdf, multi_index_psdf]),
	pd.concat([multi_index_pdf, multi_index_pdf]),
	)

	# Assert DataFrames with non-matching columns
	self.assert_eq(
	ps.concat([multi_index_psdf, other_multi_index_psdf]),
	pd.concat([multi_index_pdf, other_multi_index_pdf]),
	)

	# Assert trying to append DataFrames with different index levels
	msg = "Both DataFrames have to have the same number of index levels"
	with self.assertRaises(ValueError, msg=msg):
	ps.concat([psdf, multi_index_psdf])

	# Skip index level check when ignore_index=True
	self.assert_eq(
	ps.concat([psdf, other_multi_index_psdf], ignore_index=True),
	pd.concat([pdf, other_multi_index_pdf], ignore_index=True),
	)

	columns = pd.MultiIndex.from_tuples([("A", "X"), ("A", "Y")])
	pdf.columns = columns
	psdf.columns = columns

	self.assert_eq(ps.concat([psdf, psdf]), pd.concat([pdf, pdf]))

	def test_merge(self):
	left_pdf = pd.DataFrame(
	{
	"lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
	"value": [1, 2, 3, 5, 6, 7],
	"x": list("abcdef"),
	},
	columns=["lkey", "value", "x"],
	)
	right_pdf = pd.DataFrame(
	{
	"rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
	"value": [4, 5, 6, 7, 8, 9],
	"y": list("efghij"),
	},
	columns=["rkey", "value", "y"],
	)
	right_pser = pd.Series(list("defghi"), name="x", index=[5, 6, 7, 8, 9, 10])

	left_psdf = ps.from_pandas(left_pdf)
	right_psdf = ps.from_pandas(right_pdf)
	right_psser = ps.from_pandas(right_pser)

	def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
	ps_res = op(left_psdf, right_psdf)
	ps_res = ps_res._to_pandas()
	ps_res = ps_res.sort_values(by=list(ps_res.columns))
	ps_res = ps_res.reset_index(drop=True)
	p_res = op(left_pdf, right_pdf)
	p_res = p_res.sort_values(by=list(p_res.columns))
	p_res = p_res.reset_index(drop=True)
	self.assert_eq(ps_res, p_res)

	check(lambda left, right: left.merge(right))
	check(lambda left, right: left.merge(right, on="value"))
	check(lambda left, right: left.merge(right, on=("value",)))
	check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
	check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
	check(
	lambda left, right: left.set_index("lkey").merge(
	right, left_index=True, right_on="rkey"
	)
	)
	check(
	lambda left, right: left.merge(
	right.set_index("rkey"), left_on="lkey", right_index=True
	)
	)
	check(
	lambda left, right: left.set_index("lkey").merge(
	right.set_index("rkey"), left_index=True, right_index=True
	)
	)

	# MultiIndex
	check(
	lambda left, right: left.merge(
	right, left_on=["lkey", "value"], right_on=["rkey", "value"]
	)
	)
	check(
	lambda left, right: left.set_index(["lkey", "value"]).merge(
	right, left_index=True, right_on=["rkey", "value"]
	)
	)
	check(
	lambda left, right: left.merge(
	right.set_index(["rkey", "value"]), left_on=["lkey", "value"], right_index=True
	)
	)
	# TODO: when both left_index=True and right_index=True with multi-index
	# check(lambda left, right: left.set_index(['lkey', 'value']).merge(
	# right.set_index(['rkey', 'value']), left_index=True, right_index=True))

	# join types
	for how in ["inner", "left", "right", "outer"]:
	check(lambda left, right: left.merge(right, on="value", how=how))
	check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey", how=how))

	# suffix
	check(
	lambda left, right: left.merge(
	right, left_on="lkey", right_on="rkey", suffixes=["_left", "_right"]
	)
	)

	# Test Series on the right
	check(lambda left, right: left.merge(right), right_psser, right_pser)
	check(
	lambda left, right: left.merge(right, left_on="x", right_on="x"),
	right_psser,
	right_pser,
	)
	check(
	lambda left, right: left.set_index("x").merge(right, left_index=True, right_on="x"),
	right_psser,
	right_pser,
	)

	# Test join types with Series
	for how in ["inner", "left", "right", "outer"]:
	check(lambda left, right: left.merge(right, how=how), right_psser, right_pser)
	check(
	lambda left, right: left.merge(right, left_on="x", right_on="x", how=how),
	right_psser,
	right_pser,
	)

	# suffix with Series
	check(
	lambda left, right: left.merge(
	right,
	suffixes=["_left", "_right"],
	how="outer",
	left_index=True,
	right_index=True,
	),
	right_psser,
	right_pser,
	)

	# multi-index columns
	left_columns = pd.MultiIndex.from_tuples([(10, "lkey"), (10, "value"), (20, "x")])
	left_pdf.columns = left_columns
	left_psdf.columns = left_columns

	right_columns = pd.MultiIndex.from_tuples([(10, "rkey"), (10, "value"), (30, "y")])
	right_pdf.columns = right_columns
	right_psdf.columns = right_columns

	check(lambda left, right: left.merge(right))
	check(lambda left, right: left.merge(right, on=[(10, "value")]))
	check(
	lambda left, right: (left.set_index((10, "lkey")).merge(right.set_index((10, "rkey"))))
	)
	check(
	lambda left, right: (
	left.set_index((10, "lkey")).merge(
	right.set_index((10, "rkey")), left_index=True, right_index=True
	)
	)
	)
	# TODO: when both left_index=True and right_index=True with multi-index columns
	# check(lambda left, right: left.merge(right,
	# left_on=[('a', 'lkey')], right_on=[('a', 'rkey')]))
	# check(lambda left, right: (left.set_index(('a', 'lkey'))
	# .merge(right, left_index=True, right_on=[('a', 'rkey')])))

	# non-string names
	left_pdf.columns = [10, 100, 1000]
	left_psdf.columns = [10, 100, 1000]

	right_pdf.columns = [20, 100, 2000]
	right_psdf.columns = [20, 100, 2000]

	check(lambda left, right: left.merge(right))
	check(lambda left, right: left.merge(right, on=[100]))
	check(lambda left, right: (left.set_index(10).merge(right.set_index(20))))
	check(
	lambda left, right: (
	left.set_index(10).merge(right.set_index(20), left_index=True, right_index=True)
	)
	)

	def test_merge_same_anchor(self):
	pdf = pd.DataFrame(
	{
	"lkey": ["foo", "bar", "baz", "foo", "bar", "l"],
	"rkey": ["baz", "foo", "bar", "baz", "foo", "r"],
	"value": [1, 1, 3, 5, 6, 7],
	"x": list("abcdef"),
	"y": list("efghij"),
	},
	columns=["lkey", "rkey", "value", "x", "y"],
	)
	psdf = ps.from_pandas(pdf)

	left_pdf = pdf[["lkey", "value", "x"]]
	right_pdf = pdf[["rkey", "value", "y"]]
	left_psdf = psdf[["lkey", "value", "x"]]
	right_psdf = psdf[["rkey", "value", "y"]]

	def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
	k_res = op(left_psdf, right_psdf)
	k_res = k_res._to_pandas()
	k_res = k_res.sort_values(by=list(k_res.columns))
	k_res = k_res.reset_index(drop=True)
	p_res = op(left_pdf, right_pdf)
	p_res = p_res.sort_values(by=list(p_res.columns))
	p_res = p_res.reset_index(drop=True)
	self.assert_eq(k_res, p_res)

	check(lambda left, right: left.merge(right))
	check(lambda left, right: left.merge(right, on="value"))
	check(lambda left, right: left.merge(right, left_on="lkey", right_on="rkey"))
	check(lambda left, right: left.set_index("lkey").merge(right.set_index("rkey")))
	check(
	lambda left, right: left.set_index("lkey").merge(
	right, left_index=True, right_on="rkey"
	)
	)
	check(
	lambda left, right: left.merge(
	right.set_index("rkey"), left_on="lkey", right_index=True
	)
	)
	check(
	lambda left, right: left.set_index("lkey").merge(
	right.set_index("rkey"), left_index=True, right_index=True
	)
	)

	def test_merge_retains_indices(self):
	left_pdf = pd.DataFrame({"A": [0, 1]})
	right_pdf = pd.DataFrame({"B": [1, 2]}, index=[1, 2])
	left_psdf = ps.from_pandas(left_pdf)
	right_psdf = ps.from_pandas(right_pdf)

	self.assert_eq(
	left_psdf.merge(right_psdf, left_index=True, right_index=True),
	left_pdf.merge(right_pdf, left_index=True, right_index=True),
	)
	self.assert_eq(
	left_psdf.merge(right_psdf, left_on="A", right_index=True),
	left_pdf.merge(right_pdf, left_on="A", right_index=True),
	)
	self.assert_eq(
	left_psdf.merge(right_psdf, left_index=True, right_on="B"),
	left_pdf.merge(right_pdf, left_index=True, right_on="B"),
	)
	self.assert_eq(
	left_psdf.merge(right_psdf, left_on="A", right_on="B"),
	left_pdf.merge(right_pdf, left_on="A", right_on="B"),
	)

	def test_merge_how_parameter(self):
	left_pdf = pd.DataFrame({"A": [1, 2]})
	right_pdf = pd.DataFrame({"B": ["x", "y"]}, index=[1, 2])
	left_psdf = ps.from_pandas(left_pdf)
	right_psdf = ps.from_pandas(right_pdf)

	psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True)
	pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True)
	self.assert_eq(
	psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
	pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
	)

	psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="left")
	pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="left")
	self.assert_eq(
	psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
	pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
	)

	psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="right")
	pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="right")
	self.assert_eq(
	psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
	pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
	)

	psdf = left_psdf.merge(right_psdf, left_index=True, right_index=True, how="outer")
	pdf = left_pdf.merge(right_pdf, left_index=True, right_index=True, how="outer")
	self.assert_eq(
	psdf.sort_values(by=list(psdf.columns)).reset_index(drop=True),
	pdf.sort_values(by=list(pdf.columns)).reset_index(drop=True),
	)

	def test_merge_raises(self):
	left = ps.DataFrame(
	{"value": [1, 2, 3, 5, 6], "x": list("abcde")},
	columns=["value", "x"],
	index=["foo", "bar", "baz", "foo", "bar"],
	)
	right = ps.DataFrame(
	{"value": [4, 5, 6, 7, 8], "y": list("fghij")},
	columns=["value", "y"],
	index=["baz", "foo", "bar", "baz", "foo"],
	)

	with self.assertRaisesRegex(ValueError, "No common columns to perform merge on"):
	left[["x"]].merge(right[["y"]])

	with self.assertRaisesRegex(ValueError, "not a combination of both"):
	left.merge(right, on="value", left_on="x")

	with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
	left.merge(right, left_on="x")

	with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
	left.merge(right, left_index=True)

	with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
	left.merge(right, right_on="y")

	with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
	left.merge(right, right_index=True)

	with self.assertRaisesRegex(
	ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
	):
	left.merge(right, left_on="value", right_on=["value", "y"])

	with self.assertRaisesRegex(
	ValueError, "len\\(left_keys\\) must equal len\\(right_keys\\)"
	):
	left.merge(right, left_on=["value", "x"], right_on="value")

	with self.assertRaisesRegex(ValueError, "['inner', 'left', 'right', 'full', 'outer']"):
	left.merge(right, left_index=True, right_index=True, how="foo")

	with self.assertRaisesRegex(KeyError, "id"):
	left.merge(right, on="id")

	def test_join(self):
	# check basic function
	pdf1 = pd.DataFrame(
	{"key": ["K0", "K1", "K2", "K3"], "A": ["A0", "A1", "A2", "A3"]}, columns=["key", "A"]
	)
	pdf2 = pd.DataFrame(
	{"key": ["K0", "K1", "K2"], "B": ["B0", "B1", "B2"]}, columns=["key", "B"]
	)
	psdf1 = ps.from_pandas(pdf1)
	psdf2 = ps.from_pandas(pdf2)

	join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

	self.assert_eq(join_pdf, join_psdf)

	# join with duplicated columns in Series
	with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
	ks1 = ps.Series(["A1", "A5"], index=[1, 2], name="A")
	psdf1.join(ks1, how="outer")
	# join with duplicated columns in DataFrame
	with self.assertRaisesRegex(ValueError, "columns overlap but no suffix specified"):
	psdf1.join(psdf2, how="outer")

	# check `on` parameter
	join_pdf = pdf1.join(pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.join(psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right")
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
	self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

	join_pdf = pdf1.set_index("key").join(
	pdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
	)
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.set_index("key").join(
	psdf2.set_index("key"), on="key", lsuffix="_left", rsuffix="_right"
	)
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)
	self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

	# multi-index columns
	columns1 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "A")])
	columns2 = pd.MultiIndex.from_tuples([("x", "key"), ("Y", "B")])
	pdf1.columns = columns1
	pdf2.columns = columns2
	psdf1.columns = columns1
	psdf2.columns = columns2

	join_pdf = pdf1.join(pdf2, lsuffix="_left", rsuffix="_right")
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.join(psdf2, lsuffix="_left", rsuffix="_right")
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

	self.assert_eq(join_pdf, join_psdf)

	# check `on` parameter
	join_pdf = pdf1.join(
	pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
	)
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.join(
	psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
	)
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

	self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

	join_pdf = pdf1.set_index(("x", "key")).join(
	pdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
	)
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.set_index(("x", "key")).join(
	psdf2.set_index(("x", "key")), on=[("x", "key")], lsuffix="_left", rsuffix="_right"
	)
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

	self.assert_eq(join_pdf.reset_index(drop=True), join_psdf.reset_index(drop=True))

	# multi-index
	midx1 = pd.MultiIndex.from_tuples(
	[("w", "a"), ("x", "b"), ("y", "c"), ("z", "d")], names=["index1", "index2"]
	)
	midx2 = pd.MultiIndex.from_tuples(
	[("w", "a"), ("x", "b"), ("y", "c")], names=["index1", "index2"]
	)
	pdf1.index = midx1
	pdf2.index = midx2
	psdf1 = ps.from_pandas(pdf1)
	psdf2 = ps.from_pandas(pdf2)

	join_pdf = pdf1.join(pdf2, on=["index1", "index2"], rsuffix="_right")
	join_pdf.sort_values(by=list(join_pdf.columns), inplace=True)

	join_psdf = psdf1.join(psdf2, on=["index1", "index2"], rsuffix="_right")
	join_psdf.sort_values(by=list(join_psdf.columns), inplace=True)

	self.assert_eq(join_pdf, join_psdf)

	with self.assertRaisesRegex(
	ValueError, r'len\(left_on\) must equal the number of levels in the index of "right"'
	):
	psdf1.join(psdf2, on=["index1"], rsuffix="_right")

	def test_update(self):
	# check base function
	def get_data(left_columns=None, right_columns=None):
	left_pdf = pd.DataFrame(
	{"A": ["1", "2", "3", "4"], "B": ["100", "200", np.nan, np.nan]}, columns=["A", "B"]
	)
	right_pdf = pd.DataFrame(
	{"B": ["x", np.nan, "y", np.nan], "C": ["100", "200", "300", "400"]},
	columns=["B", "C"],
	)

	left_psdf = ps.DataFrame(
	{"A": ["1", "2", "3", "4"], "B": ["100", "200", None, None]}, columns=["A", "B"]
	)
	right_psdf = ps.DataFrame(
	{"B": ["x", None, "y", None], "C": ["100", "200", "300", "400"]}, columns=["B", "C"]
	)
	if left_columns is not None:
	left_pdf.columns = left_columns
	left_psdf.columns = left_columns
	if right_columns is not None:
	right_pdf.columns = right_columns
	right_psdf.columns = right_columns
	return left_psdf, left_pdf, right_psdf, right_pdf

	left_psdf, left_pdf, right_psdf, right_pdf = get_data()
	pser = left_pdf.B
	psser = left_psdf.B
	left_pdf.update(right_pdf)
	left_psdf.update(right_psdf)
	self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))
	self.assert_eq(psser.sort_index(), pser.sort_index())

	left_psdf, left_pdf, right_psdf, right_pdf = get_data()
	left_pdf.update(right_pdf, overwrite=False)
	left_psdf.update(right_psdf, overwrite=False)
	self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))

	with self.assertRaises(NotImplementedError):
	left_psdf.update(right_psdf, join="right")

	# multi-index columns
	left_columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
	right_columns = pd.MultiIndex.from_tuples([("X", "B"), ("Y", "C")])

	left_psdf, left_pdf, right_psdf, right_pdf = get_data(
	left_columns=left_columns, right_columns=right_columns
	)
	left_pdf.update(right_pdf)
	left_psdf.update(right_psdf)
	self.assert_eq(
	left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
	left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
	)

	left_psdf, left_pdf, right_psdf, right_pdf = get_data(
	left_columns=left_columns, right_columns=right_columns
	)
	left_pdf.update(right_pdf, overwrite=False)
	left_psdf.update(right_psdf, overwrite=False)
	self.assert_eq(
	left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
	left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
	)

	right_columns = pd.MultiIndex.from_tuples([("Y", "B"), ("Y", "C")])
	left_psdf, left_pdf, right_psdf, right_pdf = get_data(
	left_columns=left_columns, right_columns=right_columns
	)
	left_pdf.update(right_pdf)
	left_psdf.update(right_psdf)
	self.assert_eq(
	left_pdf.sort_values(by=[("X", "A"), ("X", "B")]),
	left_psdf.sort_values(by=[("X", "A"), ("X", "B")]),
	)


	class FrameCombineTests(
	FrameCombineMixin,
	PandasOnSparkTestCase,
	SQLTestUtils,
	):
	pass


	if __name__ == "__main__":
	from pyspark.pandas.tests.computation.test_combine import * # noqa: F401

	try:
	import xmlrunner

	testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
	except ImportError:
	testRunner = None
	unittest.main(testRunner=testRunner, verbosity=2)