| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| # pylint: disable=import-outside-toplevel, unused-argument |
| |
| from datetime import datetime, timezone |
| |
| import numpy as np |
| import pandas as pd |
| from numpy.core.multiarray import array |
| from pytest_mock import MockerFixture |
| |
| from superset.db_engine_specs.base import BaseEngineSpec |
| from superset.result_set import stringify_values, SupersetResultSet |
| |
| |
| def test_column_names_as_bytes() -> None: |
| """ |
| Test that we can handle column names as bytes. |
| """ |
| from superset.db_engine_specs.redshift import RedshiftEngineSpec |
| from superset.result_set import SupersetResultSet |
| |
| data = ( |
| [ |
| "2016-01-26", |
| 392.002014, |
| 397.765991, |
| 390.575012, |
| 392.153015, |
| 392.153015, |
| 58147000, |
| ], |
| [ |
| "2016-01-27", |
| 392.444, |
| 396.842987, |
| 391.782013, |
| 394.971985, |
| 394.971985, |
| 47424400, |
| ], |
| ) |
| description = [ |
| (b"date", 1043, None, None, None, None, None), |
| (b"open", 701, None, None, None, None, None), |
| (b"high", 701, None, None, None, None, None), |
| (b"low", 701, None, None, None, None, None), |
| (b"close", 701, None, None, None, None, None), |
| (b"adj close", 701, None, None, None, None, None), |
| (b"volume", 20, None, None, None, None, None), |
| ] |
| result_set = SupersetResultSet(data, description, RedshiftEngineSpec) # type: ignore |
| |
| assert ( |
| result_set.to_pandas_df().to_markdown() |
| == """ |
| | | date | open | high | low | close | adj close | volume | |
| |---:|:-----------|--------:|--------:|--------:|--------:|------------:|---------:| |
| | 0 | 2016-01-26 | 392.002 | 397.766 | 390.575 | 392.153 | 392.153 | 58147000 | |
| | 1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 | 394.972 | 47424400 | |
| """.strip() |
| ) |
| |
| |
| def test_stringify_with_null_integers(): |
| """ |
| Test that we can safely handle type errors when an integer column has a null value |
| """ |
| |
| data = [ |
| ("foo", "bar", pd.NA, None), |
| ("foo", "bar", pd.NA, True), |
| ("foo", "bar", pd.NA, None), |
| ] |
| numpy_dtype = [ |
| ("id", "object"), |
| ("value", "object"), |
| ("num", "object"), |
| ("bool", "object"), |
| ] |
| |
| array2 = np.array(data, dtype=numpy_dtype) |
| column_names = ["id", "value", "num", "bool"] |
| |
| result_set = np.array([stringify_values(array2[column]) for column in column_names]) |
| |
| expected = np.array( |
| [ |
| array(["foo", "foo", "foo"], dtype=object), |
| array(["bar", "bar", "bar"], dtype=object), |
| array([None, None, None], dtype=object), |
| array([None, "True", None], dtype=object), |
| ] |
| ) |
| |
| assert np.array_equal(result_set, expected) |
| |
| |
| def test_stringify_with_null_timestamps(): |
| """ |
| Test that we can safely handle type errors when a timestamp column has a null value |
| """ |
| |
| data = [ |
| ("foo", "bar", pd.NaT, None), |
| ("foo", "bar", pd.NaT, True), |
| ("foo", "bar", pd.NaT, None), |
| ] |
| numpy_dtype = [ |
| ("id", "object"), |
| ("value", "object"), |
| ("num", "object"), |
| ("bool", "object"), |
| ] |
| |
| array2 = np.array(data, dtype=numpy_dtype) |
| column_names = ["id", "value", "num", "bool"] |
| |
| result_set = np.array([stringify_values(array2[column]) for column in column_names]) |
| |
| expected = np.array( |
| [ |
| array(["foo", "foo", "foo"], dtype=object), |
| array(["bar", "bar", "bar"], dtype=object), |
| array([None, None, None], dtype=object), |
| array([None, "True", None], dtype=object), |
| ] |
| ) |
| |
| assert np.array_equal(result_set, expected) |
| |
| |
| def test_timezone_series(mocker: MockerFixture) -> None: |
| """ |
| Test that we can handle timezone-aware datetimes correctly. |
| |
| This covers a regression that happened when upgrading from Pandas 1.5.3 to 2.0.3. |
| """ |
| logger = mocker.patch("superset.result_set.logger") |
| |
| data = [[datetime(2023, 1, 1, tzinfo=timezone.utc)]] |
| description = [(b"__time", "datetime", None, None, None, None, False)] |
| result_set = SupersetResultSet( |
| data, |
| description, # type: ignore |
| BaseEngineSpec, |
| ) |
| assert result_set.to_pandas_df().values.tolist() == [ |
| [pd.Timestamp("2023-01-01 00:00:00+0000", tz="UTC")] |
| ] |
| logger.exception.assert_not_called() |