blob: aa584a26ee251ccd65531c48546146959e240a2b [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import unittest.mock as mock
from contextlib import contextmanager
import pytest
from pandas import DataFrame
from sqlalchemy import column
from superset.connectors.sqla.models import TableColumn
from superset.db_engine_specs.base import BaseEngineSpec
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.sql.parse import Table
from tests.integration_tests.base_tests import SupersetTestCase
from tests.integration_tests.fixtures.birth_names_dashboard import (
load_birth_names_dashboard_with_slices, # noqa: F401
load_birth_names_data, # noqa: F401
)
@contextmanager
def mock_engine_with_credentials(*args, **kwargs):
engine_mock = mock.Mock()
engine_mock.dialect.credentials_info = {
"key": "value"
} # Add the credentials_info attribute
yield engine_mock
class TestBigQueryDbEngineSpec(SupersetTestCase):
def test_bigquery_sqla_column_label(self):
"""
DB Eng Specs (bigquery): Test column label
"""
test_cases = {
"Col": "Col",
"SUM(x)": "SUM_x__5f110",
"SUM[x]": "SUM_x__7ebe1",
"12345_col": "_12345_col_8d390",
}
for original, expected in test_cases.items():
actual = BigQueryEngineSpec.make_label_compatible(column(original).name)
assert actual == expected
def test_timegrain_expressions(self):
"""
DB Eng Specs (bigquery): Test time grain expressions
"""
col = column("temporal")
test_cases = {
"DATE": "DATE_TRUNC(temporal, HOUR)",
"TIME": "TIME_TRUNC(temporal, HOUR)",
"DATETIME": "DATETIME_TRUNC(temporal, HOUR)",
"TIMESTAMP": "TIMESTAMP_TRUNC(temporal, HOUR)",
}
for type_, expected in test_cases.items():
col.type = type_
actual = BigQueryEngineSpec.get_timestamp_expr(
col=col, pdf=None, time_grain="PT1H"
)
assert str(actual) == expected
def test_custom_minute_timegrain_expressions(self):
"""
DB Eng Specs (bigquery): Test time grain expressions
"""
col = column("temporal")
test_cases = {
"DATE": "CAST(TIMESTAMP_SECONDS("
"5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
") AS DATE)",
"DATETIME": "CAST(TIMESTAMP_SECONDS("
"5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
") AS DATETIME)",
"TIMESTAMP": "CAST(TIMESTAMP_SECONDS("
"5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
") AS TIMESTAMP)",
}
for type_, expected in test_cases.items():
col.type = type_
actual = BigQueryEngineSpec.get_timestamp_expr(
col=col, pdf=None, time_grain="PT5M"
)
assert str(actual) == expected
def test_fetch_data(self):
"""
DB Eng Specs (bigquery): Test fetch data
"""
# Mock a google.cloud.bigquery.table.Row
class Row:
def __init__(self, value):
self._value = value
def values(self):
return self._value
data1 = [(1, "foo")]
with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data1):
result = BigQueryEngineSpec.fetch_data(None, 0)
assert result == data1
data2 = [Row(1), Row(2)]
with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data2):
result = BigQueryEngineSpec.fetch_data(None, 0)
assert result == [1, 2]
@mock.patch.object(
BigQueryEngineSpec, "get_engine", side_effect=mock_engine_with_credentials
)
@mock.patch.object(BigQueryEngineSpec, "get_time_partition_column")
@mock.patch.object(BigQueryEngineSpec, "get_max_partition_id")
@mock.patch.object(BigQueryEngineSpec, "quote_table", return_value="`table_name`")
def test_get_extra_table_metadata(
self,
mock_quote_table,
mock_get_max_partition_id,
mock_get_time_partition_column,
mock_get_engine,
):
"""
DB Eng Specs (bigquery): Test extra table metadata
"""
database = mock.Mock()
sql = "SELECT * FROM `table_name`"
database.compile_sqla_query.return_value = sql
tbl = Table("some_table", "some_schema")
# Test no indexes
mock_get_time_partition_column.return_value = None
mock_get_max_partition_id.return_value = None
result = BigQueryEngineSpec.get_extra_table_metadata(database, tbl)
assert result == {}
mock_get_time_partition_column.return_value = "ds"
mock_get_max_partition_id.return_value = "19690101"
result = BigQueryEngineSpec.get_extra_table_metadata(database, tbl)
print(result)
assert result == {
"indexes": [{"cols": ["ds"], "name": "partitioned", "type": "partitioned"}],
"partitions": {
"cols": ["ds"],
"latest": {"ds": "19690101"},
"partitionQuery": sql,
},
}
@mock.patch("superset.db_engine_specs.bigquery.BigQueryEngineSpec.get_engine")
@mock.patch("superset.db_engine_specs.bigquery.pandas_gbq")
@mock.patch("superset.db_engine_specs.bigquery.service_account")
def test_df_to_sql(self, mock_service_account, mock_pandas_gbq, mock_get_engine):
"""
DB Eng Specs (bigquery): Test DataFrame to SQL contract
"""
mock_service_account.Credentials.from_service_account_info = mock.MagicMock(
return_value="account_info"
)
mock_get_engine.return_value.__enter__.return_value.url.host = "google-host"
mock_get_engine.return_value.__enter__.return_value.dialect.credentials_info = (
"secrets"
)
df = DataFrame()
database = mock.MagicMock()
BigQueryEngineSpec.df_to_sql(
database=database,
table=Table(table="name", schema="schema"),
df=df,
to_sql_kwargs={"if_exists": "extra_key"},
)
mock_pandas_gbq.to_gbq.assert_called_with(
df,
project_id="google-host",
destination_table="schema.name",
credentials="account_info",
if_exists="extra_key",
)
def test_extract_errors(self):
msg = "403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project profound-keel-310804: User does not have bigquery.jobs.create permission in project profound-keel-310804" # noqa: E501
result = BigQueryEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='Unable to connect. Verify that the following roles are set on the service account: "BigQuery Data Viewer", "BigQuery Metadata Viewer", "BigQuery Job User" and the following permissions are set "bigquery.readsessions.create", "bigquery.readsessions.getData"', # noqa: E501
error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Google BigQuery",
"issue_codes": [
{
"code": 1017,
"message": "",
}
],
},
)
]
msg = "bigquery error: 404 Not found: Dataset fakeDataset:bogusSchema was not found in location" # noqa: E501
result = BigQueryEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='The schema "bogusSchema" does not exist. A valid schema must be used to run this query.', # noqa: E501
error_type=SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Google BigQuery",
"issue_codes": [
{
"code": 1003,
"message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", # noqa: E501
},
{
"code": 1004,
"message": "Issue 1004 - The column was deleted or renamed in the database.", # noqa: E501
},
],
},
)
]
msg = 'Table name "badtable" missing dataset while no default dataset is set in the request' # noqa: E501
result = BigQueryEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='The table "badtable" does not exist. A valid table must be used to run this query.', # noqa: E501
error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Google BigQuery",
"issue_codes": [
{
"code": 1003,
"message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", # noqa: E501
},
{
"code": 1005,
"message": "Issue 1005 - The table was deleted or renamed in the database.", # noqa: E501
},
],
},
)
]
msg = "Unrecognized name: badColumn at [1:8]"
result = BigQueryEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='We can\'t seem to resolve column "badColumn" at line 1:8.',
error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Google BigQuery",
"issue_codes": [
{
"code": 1003,
"message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", # noqa: E501
},
{
"code": 1004,
"message": "Issue 1004 - The column was deleted or renamed in the database.", # noqa: E501
},
],
},
)
]
msg = 'Syntax error: Expected end of input but got identifier "from_"'
result = BigQueryEngineSpec.extract_errors(Exception(msg))
assert result == [
SupersetError(
message='Please check your query for syntax errors at or near "from_". Then, try running your query again.', # noqa: E501
error_type=SupersetErrorType.SYNTAX_ERROR,
level=ErrorLevel.ERROR,
extra={
"engine_name": "Google BigQuery",
"issue_codes": [
{
"code": 1030,
"message": "Issue 1030 - The query has a syntax error.",
}
],
},
)
]
@mock.patch("superset.models.core.Database.db_engine_spec", BigQueryEngineSpec)
@mock.patch("sqlalchemy_bigquery._helpers.create_bigquery_client", mock.Mock)
@pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
def test_calculated_column_in_order_by(self):
table = self.get_table(name="birth_names")
TableColumn(
column_name="gender_cc",
type="VARCHAR(255)",
table=table,
expression="""
case
when gender='boy' then 'male'
else 'female'
end
""",
)
table.database.sqlalchemy_uri = "bigquery://"
query_obj = {
"groupby": ["gender_cc"],
"is_timeseries": False,
"filter": [],
"orderby": [["gender_cc", True]],
}
sql = table.get_query_str(query_obj)
assert "ORDER BY `gender_cc` ASC" in sql