fix: adhoc column quoting (#36215)
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
index 3afe9fa..73a60de 100644
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -1509,33 +1509,48 @@
:rtype: sqlalchemy.sql.column
"""
label = utils.get_column_name(col)
- try:
- sql_expression = col["sqlExpression"]
-
- # For column references, conditionally quote identifiers that need it
- if col.get("isColumnReference"):
- sql_expression = self.database.quote_identifier(sql_expression)
-
- expression = self._process_select_expression(
- expression=sql_expression,
- database_id=self.database_id,
- engine=self.database.backend,
- schema=self.schema,
- template_processor=template_processor,
- )
- except SupersetSecurityException as ex:
- raise QueryObjectValidationError(ex.message) from ex
+ sql_expression = col["sqlExpression"]
time_grain = col.get("timeGrain")
has_timegrain = col.get("columnType") == "BASE_AXIS" and time_grain
is_dttm = False
pdf = None
- if col_in_metadata := self.get_column(expression):
+ is_column_reference = col.get("isColumnReference")
+
+ # First, check if this is a column reference that exists in metadata
+ col_in_metadata = None
+ if is_column_reference:
+ col_in_metadata = self.get_column(sql_expression)
+
+ if col_in_metadata:
+ # Column exists in metadata - use it directly
sqla_column = col_in_metadata.get_sqla_col(
template_processor=template_processor
)
is_dttm = col_in_metadata.is_temporal
pdf = col_in_metadata.python_date_format
else:
+ # Column doesn't exist in metadata or is not a reference - treat as ad-hoc
+ # expression Note: If isColumnReference=true but column not found, we still
+ # quote it as a fallback for backwards compatibility, though this indicates
+ # the frontend sent incorrect metadata
+ try:
+ # For column references, conditionally quote identifiers that need it
+ expression_to_process = sql_expression
+ if is_column_reference:
+ expression_to_process = self.database.quote_identifier(
+ sql_expression
+ )
+
+ expression = self._process_select_expression(
+ expression=expression_to_process,
+ database_id=self.database_id,
+ engine=self.database.backend,
+ schema=self.schema,
+ template_processor=template_processor,
+ )
+ except SupersetSecurityException as ex:
+ raise QueryObjectValidationError(ex.message) from ex
+
sqla_column = literal_column(expression)
if has_timegrain or force_type_check:
try:
diff --git a/superset/utils/log.py b/superset/utils/log.py
index 81ea1ba..51e91e1 100644
--- a/superset/utils/log.py
+++ b/superset/utils/log.py
@@ -408,8 +408,19 @@
db.session.bulk_save_objects(logs)
db.session.commit() # pylint: disable=consider-using-transaction
except SQLAlchemyError as ex:
+ # Log errors but don't raise - logging failures should not break the
+ # application. Common in tests where the session may be in prepared state or
+ # db is locked
logging.error("DBEventLogger failed to log event(s)")
logging.exception(ex)
+ # Rollback to clean up the session state
+ try:
+ db.session.rollback()
+ except Exception: # pylint: disable=broad-except
+ # If rollback also fails, just continue - don't let issues crash the app
+ logging.error(
+ "DBEventLogger failed to rollback the session after failure"
+ )
class StdOutEventLogger(AbstractEventLogger):
diff --git a/tests/integration_tests/query_context_tests.py b/tests/integration_tests/query_context_tests.py
index 2169458..dedc9b9 100644
--- a/tests/integration_tests/query_context_tests.py
+++ b/tests/integration_tests/query_context_tests.py
@@ -864,12 +864,14 @@
"label": "I_AM_AN_ORIGINAL_COLUMN",
"sqlExpression": "col5",
"timeGrain": "P1Y",
+ "isColumnReference": True,
}
adhoc_column: AdhocColumn = {
"label": "I_AM_A_TRUNC_COLUMN",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
+ "isColumnReference": True,
}
qc = QueryContextFactory().create(
datasource={
@@ -1039,6 +1041,7 @@
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P3M",
+ "isColumnReference": True,
}
qc = QueryContextFactory().create(
datasource={
@@ -1152,6 +1155,7 @@
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P3M",
+ "isColumnReference": True,
}
],
"metrics": [
diff --git a/tests/unit_tests/models/helpers_test.py b/tests/unit_tests/models/helpers_test.py
index dfbcd9a..d1b5721 100644
--- a/tests/unit_tests/models/helpers_test.py
+++ b/tests/unit_tests/models/helpers_test.py
@@ -1395,20 +1395,24 @@
def test_adhoc_column_to_sqla_with_column_reference(database: Database) -> None:
"""
- Test that adhoc_column_to_sqla
- properly quotes column identifiers when isColumnReference is true.
+ Test that adhoc_column_to_sqla properly handles column references
+ by looking up the column in metadata instead of quoting and processing through
+ SQLGlot.
- This tests the fix for column names with spaces being properly quoted
- before being processed by SQLGlot to prevent "column AS alias" misinterpretation.
+ This tests the fix for column names with spaces being properly handled
+ without going through SQLGlot which could misinterpret "column AS alias" patterns.
"""
- from superset.connectors.sqla.models import SqlaTable
+ from superset.connectors.sqla.models import SqlaTable, TableColumn
table = SqlaTable(
table_name="test_table",
database=database,
+ columns=[
+ TableColumn(column_name="Customer Name", type="TEXT"),
+ ],
)
- # Test 1: Column reference with spaces should be quoted
+ # Test: Column reference with spaces should be found in metadata
col_with_spaces: AdhocColumn = {
"sqlExpression": "Customer Name",
"label": "Customer Name",
@@ -1417,8 +1421,223 @@
result = table.adhoc_column_to_sqla(col_with_spaces)
- # Should contain the quoted column name
+ # Should return a valid SQLAlchemy column
assert result is not None
result_str = str(result)
- assert '"Customer Name"' in result_str
+ # The column name should be present (may or may not be quoted depending on dialect)
+ assert "Customer Name" in result_str or '"Customer Name"' in result_str
+
+
+def test_adhoc_column_to_sqla_preserves_column_type_for_time_grain(
+ database: Database,
+) -> None:
+ """
+ Test that adhoc_column_to_sqla preserves column type info in column references.
+
+ This tests the fix where column references now look up metadata first, preserving
+ type information needed for time grain operations. Previously, quoting the column
+ name before metadata lookup would cause the column to not be found, resulting in
+ NULL type and failing to apply time grain transformations properly.
+
+ The test verifies that:
+ 1. Column metadata is found by looking up the unquoted column name
+ 2. The column type (DATE) is preserved when creating the SQLAlchemy column
+ 3. The get_timestamp_expr method is properly called with the column type info
+ """
+ from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+ # Create a table with a temporal column
+ table = SqlaTable(
+ table_name="test_table",
+ database=database,
+ columns=[
+ TableColumn(
+ column_name="local_date",
+ type="DATE",
+ is_dttm=True,
+ )
+ ],
+ )
+
+ # Test with a DATE column reference with time grain
+ date_col: AdhocColumn = {
+ "sqlExpression": "local_date",
+ "label": "local_date",
+ "isColumnReference": True,
+ "timeGrain": "P1D", # Daily time grain
+ "columnType": "BASE_AXIS",
+ }
+
+ # Should not raise ColumnNotFoundException
+ result = table.adhoc_column_to_sqla(date_col)
+
+ assert result is not None
+ result_str = str(result)
+
+ # Verify the column name is present (may be quoted depending on dialect)
+ assert "local_date" in result_str
+
+
+def test_adhoc_column_to_sqla_with_temporal_column_types(database: Database) -> None:
+ """
+ Test that adhoc_column_to_sqla correctly handles different temporal column types.
+
+ This verifies that for different temporal types (DATE, DATETIME, TIMESTAMP),
+ the column metadata is properly found and the column type is preserved,
+ allowing time grain operations to work correctly.
+ """
+ from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+ # Test different temporal types
+ temporal_types = ["DATE", "DATETIME", "TIMESTAMP"]
+
+ for type_name in temporal_types:
+ table = SqlaTable(
+ table_name="test_table",
+ database=database,
+ columns=[
+ TableColumn(
+ column_name="time_col",
+ type=type_name,
+ is_dttm=True,
+ )
+ ],
+ )
+
+ time_col: AdhocColumn = {
+ "sqlExpression": "time_col",
+ "label": "time_col",
+ "isColumnReference": True,
+ "timeGrain": "P1D",
+ "columnType": "BASE_AXIS",
+ }
+
+ result = table.adhoc_column_to_sqla(time_col)
+
+ assert result is not None
+ result_str = str(result)
+
+ # Verify the column name is present
+ assert "time_col" in result_str
+
+
+def test_adhoc_column_with_spaces_generates_quoted_sql(database: Database) -> None:
+ """
+ Test that column names with spaces are properly quoted in the generated SQL.
+
+ This verifies that even though we look up columns using unquoted names,
+ the final SQL still properly quotes column names that need quoting (like those with
+ spaces).
+ """
+
+ from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+ table = SqlaTable(
+ table_name="test_table",
+ database=database,
+ columns=[
+ TableColumn(column_name="Customer Name", type="TEXT"),
+ TableColumn(column_name="Order Total", type="NUMERIC"),
+ ],
+ )
+
+ # Test column reference with spaces
+ col_with_spaces: AdhocColumn = {
+ "sqlExpression": "Customer Name",
+ "label": "Customer Name",
+ "isColumnReference": True,
+ }
+
+ result = table.adhoc_column_to_sqla(col_with_spaces)
+
+ # Compile the column to SQL to see how it's rendered
+ with database.get_sqla_engine() as engine:
+ sql = str(
+ result.compile(
+ dialect=engine.dialect, compile_kwargs={"literal_binds": True}
+ )
+ )
+
+ # The SQL should quote the column name (SQLite uses double quotes)
+ # Column names with spaces MUST be quoted in SQL
+ assert '"Customer Name"' in sql, f"Expected quoted column name in SQL: {sql}"
+
+ # Also test that it works in a query context
+ col_numeric: AdhocColumn = {
+ "sqlExpression": "Order Total",
+ "label": "Order Total",
+ "isColumnReference": True,
+ }
+
+ result_numeric = table.adhoc_column_to_sqla(col_numeric)
+
+ with database.get_sqla_engine() as engine:
+ sql_numeric = str(
+ result_numeric.compile(
+ dialect=engine.dialect, compile_kwargs={"literal_binds": True}
+ )
+ )
+
+ assert '"Order Total"' in sql_numeric, (
+ f"Expected quoted column name in SQL: {sql_numeric}"
+ )
+
+
+def test_adhoc_column_with_spaces_in_full_query(database: Database) -> None:
+ """
+ Test that column names with spaces work correctly in a full SELECT query.
+
+ This demonstrates that the fix properly handles column names with spaces
+ throughout the entire query generation process, with proper quoting in the final
+ SQL.
+ """
+ import sqlalchemy as sa
+
+ from superset.connectors.sqla.models import SqlaTable, TableColumn
+
+ table = SqlaTable(
+ table_name="test_table",
+ database=database,
+ columns=[
+ TableColumn(column_name="Customer Name", type="TEXT"),
+ TableColumn(column_name="Order Total", type="NUMERIC"),
+ ],
+ )
+
+ # Create adhoc columns for both columns with spaces
+ customer_col: AdhocColumn = {
+ "sqlExpression": "Customer Name",
+ "label": "Customer Name",
+ "isColumnReference": True,
+ }
+
+ order_col: AdhocColumn = {
+ "sqlExpression": "Order Total",
+ "label": "Order Total",
+ "isColumnReference": True,
+ }
+
+ # Get SQLAlchemy columns
+ customer_sqla = table.adhoc_column_to_sqla(customer_col)
+ order_sqla = table.adhoc_column_to_sqla(order_col)
+
+ # Build a full query
+ tbl = table.get_sqla_table()
+ query = sa.select(customer_sqla, order_sqla).select_from(tbl)
+
+ # Compile to SQL
+ with database.get_sqla_engine() as engine:
+ sql = str(
+ query.compile(
+ dialect=engine.dialect, compile_kwargs={"literal_binds": True}
+ )
+ )
+
+ # Verify both column names are quoted in the final SQL
+ assert '"Customer Name"' in sql, f"Customer Name not properly quoted in SQL: {sql}"
+ assert '"Order Total"' in sql, f"Order Total not properly quoted in SQL: {sql}"
+
+ # Verify SELECT and FROM clauses are present
+ assert "SELECT" in sql
+ assert "FROM" in sql