docs/scripts/extract_custom_errors.py - superset - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 """
 Extract custom_errors from database engine specs for documentation.

 This script parses engine spec files to extract error handling information
 that can be displayed on database documentation pages.

 Usage: python scripts/extract_custom_errors.py
 Output: JSON mapping of engine spec module names to their custom errors
 """

 import ast
 import json  # noqa: TID251 - standalone docs script, not part of superset
 import sys
 from pathlib import Path
 from typing import Any

 # Map SupersetErrorType values to human-readable categories and issue codes
 ERROR_TYPE_INFO = {
     "CONNECTION_INVALID_USERNAME_ERROR": {
         "category": "Authentication",
         "description": "Invalid username",
         "issue_codes": [1012],
     },
     "CONNECTION_INVALID_PASSWORD_ERROR": {
         "category": "Authentication",
         "description": "Invalid password",
         "issue_codes": [1013],
     },
     "CONNECTION_ACCESS_DENIED_ERROR": {
         "category": "Authentication",
         "description": "Access denied",
         "issue_codes": [1014, 1015],
     },
     "CONNECTION_INVALID_HOSTNAME_ERROR": {
         "category": "Connection",
         "description": "Invalid hostname",
         "issue_codes": [1007],
     },
     "CONNECTION_PORT_CLOSED_ERROR": {
         "category": "Connection",
         "description": "Port closed or refused",
         "issue_codes": [1008],
     },
     "CONNECTION_HOST_DOWN_ERROR": {
         "category": "Connection",
         "description": "Host unreachable",
         "issue_codes": [1009],
     },
     "CONNECTION_UNKNOWN_DATABASE_ERROR": {
         "category": "Connection",
         "description": "Unknown database",
         "issue_codes": [1015],
     },
     "CONNECTION_DATABASE_PERMISSIONS_ERROR": {
         "category": "Permissions",
         "description": "Insufficient permissions",
         "issue_codes": [1017],
     },
     "CONNECTION_MISSING_PARAMETERS_ERROR": {
         "category": "Configuration",
         "description": "Missing parameters",
         "issue_codes": [1018],
     },
     "CONNECTION_DATABASE_TIMEOUT": {
         "category": "Connection",
         "description": "Connection timeout",
         "issue_codes": [1001, 1009],
     },
     "COLUMN_DOES_NOT_EXIST_ERROR": {
         "category": "Query",
         "description": "Column not found",
         "issue_codes": [1003, 1004],
     },
     "TABLE_DOES_NOT_EXIST_ERROR": {
         "category": "Query",
         "description": "Table not found",
         "issue_codes": [1003, 1005],
     },
     "SCHEMA_DOES_NOT_EXIST_ERROR": {
         "category": "Query",
         "description": "Schema not found",
         "issue_codes": [1003, 1016],
     },
     "SYNTAX_ERROR": {
         "category": "Query",
         "description": "SQL syntax error",
         "issue_codes": [1030],
     },
     "OBJECT_DOES_NOT_EXIST_ERROR": {
         "category": "Query",
         "description": "Object not found",
         "issue_codes": [1029],
     },
     "GENERIC_DB_ENGINE_ERROR": {
         "category": "General",
         "description": "Database engine error",
         "issue_codes": [1002],
     },
 }


 def extract_string_from_call(node: ast.Call) -> str | None:
     """Extract string from __() or _() translation calls."""
     if not node.args:
         return None
     arg = node.args[0]
     if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
         return arg.value
     elif isinstance(arg, ast.JoinedStr):
         # f-string - try to reconstruct
         parts = []
         for value in arg.values:
             if isinstance(value, ast.Constant):
                 parts.append(str(value.value))
             elif isinstance(value, ast.FormattedValue):
                 # Just use a placeholder
                 parts.append("{...}")
         return "".join(parts)
     return None


 def extract_custom_errors_from_file(filepath: Path) -> dict[str, list[dict[str, Any]]]:
     """
     Extract custom_errors definitions from a Python engine spec file.

     Returns a dict mapping class names to their custom errors list.
     """
     results = {}

     try:
         with open(filepath, "r", encoding="utf-8") as f:
             source = f.read()

         tree = ast.parse(source)

         for node in ast.walk(tree):
             if isinstance(node, ast.ClassDef):
                 class_name = node.name

                 for item in node.body:
                     # Look for custom_errors = { ... }
                     if (
                         isinstance(item, ast.AnnAssign)
                         and isinstance(item.target, ast.Name)
                         and item.target.id == "custom_errors"
                         and isinstance(item.value, ast.Dict)
                     ):
                         errors = extract_errors_from_dict(item.value, source)
                         if errors:
                             results[class_name] = errors

                     # Also handle simple assignment: custom_errors = { ... }
                     elif (
                         isinstance(item, ast.Assign)
                         and len(item.targets) == 1
                         and isinstance(item.targets[0], ast.Name)
                         and item.targets[0].id == "custom_errors"
                         and isinstance(item.value, ast.Dict)
                     ):
                         errors = extract_errors_from_dict(item.value, source)
                         if errors:
                             results[class_name] = errors

     except (OSError, SyntaxError, ValueError) as e:
         print(f"Error parsing {filepath}: {e}", file=sys.stderr)

     return results


 def extract_regex_info(key: ast.expr) -> dict[str, Any]:
     """Extract regex pattern info from the dict key."""
     if isinstance(key, ast.Name):
         return {"regex_name": key.id}
     if isinstance(key, ast.Call):
         if (
             isinstance(key.func, ast.Attribute)
             and key.func.attr == "compile"
             and key.args
             and isinstance(key.args[0], ast.Constant)
         ):
             return {"regex_pattern": key.args[0].value}
     return {}


 def extract_invalid_fields(extra_node: ast.Dict) -> list[str]:
     """Extract invalid fields from the extra dict."""
     for k, v in zip(extra_node.keys, extra_node.values, strict=False):
         if (
             isinstance(k, ast.Constant)
             and k.value == "invalid"
             and isinstance(v, ast.List)
         ):
             return [elem.value for elem in v.elts if isinstance(elem, ast.Constant)]
     return []


 def extract_error_tuple_info(value: ast.Tuple) -> dict[str, Any]:
     """Extract error info from the (message, error_type, extra) tuple."""
     result: dict[str, Any] = {}

     # First element: message template
     msg_node = value.elts[0]
     if isinstance(msg_node, ast.Call):
         message = extract_string_from_call(msg_node)
         if message:
             result["message_template"] = message
     elif isinstance(msg_node, ast.Constant):
         result["message_template"] = msg_node.value

     # Second element: SupersetErrorType.SOMETHING
     type_node = value.elts[1]
     if isinstance(type_node, ast.Attribute):
         error_type = type_node.attr
         result["error_type"] = error_type
         if error_type in ERROR_TYPE_INFO:
             type_info = ERROR_TYPE_INFO[error_type]
             result["category"] = type_info["category"]
             result["description"] = type_info["description"]
             result["issue_codes"] = type_info["issue_codes"]

     # Third element: extra dict with invalid fields
     if len(value.elts) >= 3 and isinstance(value.elts[2], ast.Dict):
         invalid_fields = extract_invalid_fields(value.elts[2])
         if invalid_fields:
             result["invalid_fields"] = invalid_fields

     return result


 def extract_errors_from_dict(dict_node: ast.Dict, source: str) -> list[dict[str, Any]]:
     """Extract error information from a custom_errors dict AST node."""
     errors = []

     for key, value in zip(dict_node.keys, dict_node.values, strict=False):
         if key is None or value is None:
             continue

         error_info = extract_regex_info(key)

         if isinstance(value, ast.Tuple) and len(value.elts) >= 2:
             error_info.update(extract_error_tuple_info(value))

         if error_info.get("error_type") and error_info.get("message_template"):
             errors.append(error_info)

     return errors


 def main() -> None:
     """Main function to extract custom_errors from all engine specs."""
     # Find the superset root directory
     script_dir = Path(__file__).parent
     root_dir = script_dir.parent.parent
     specs_dir = root_dir / "superset" / "db_engine_specs"

     if not specs_dir.exists():
         print(f"Error: Engine specs directory not found: {specs_dir}", file=sys.stderr)
         sys.exit(1)

     all_errors = {}

     # Process each Python file in the specs directory
     for filepath in sorted(specs_dir.glob("*.py")):
         if filepath.name.startswith("_"):
             continue

         module_name = filepath.stem
         class_errors = extract_custom_errors_from_file(filepath)

         if class_errors:
             # Store errors by module and class
             all_errors[module_name] = class_errors

     # Output as JSON
     print(json.dumps(all_errors, indent=2))


 if __name__ == "__main__":
     main()
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	"""
	Extract custom_errors from database engine specs for documentation.

	This script parses engine spec files to extract error handling information
	that can be displayed on database documentation pages.

	Usage: python scripts/extract_custom_errors.py
	Output: JSON mapping of engine spec module names to their custom errors
	"""

	import ast
	import json # noqa: TID251 - standalone docs script, not part of superset
	import sys
	from pathlib import Path
	from typing import Any

	# Map SupersetErrorType values to human-readable categories and issue codes
	ERROR_TYPE_INFO = {
	"CONNECTION_INVALID_USERNAME_ERROR": {
	"category": "Authentication",
	"description": "Invalid username",
	"issue_codes": [1012],
	},
	"CONNECTION_INVALID_PASSWORD_ERROR": {
	"category": "Authentication",
	"description": "Invalid password",
	"issue_codes": [1013],
	},
	"CONNECTION_ACCESS_DENIED_ERROR": {
	"category": "Authentication",
	"description": "Access denied",
	"issue_codes": [1014, 1015],
	},
	"CONNECTION_INVALID_HOSTNAME_ERROR": {
	"category": "Connection",
	"description": "Invalid hostname",
	"issue_codes": [1007],
	},
	"CONNECTION_PORT_CLOSED_ERROR": {
	"category": "Connection",
	"description": "Port closed or refused",
	"issue_codes": [1008],
	},
	"CONNECTION_HOST_DOWN_ERROR": {
	"category": "Connection",
	"description": "Host unreachable",
	"issue_codes": [1009],
	},
	"CONNECTION_UNKNOWN_DATABASE_ERROR": {
	"category": "Connection",
	"description": "Unknown database",
	"issue_codes": [1015],
	},
	"CONNECTION_DATABASE_PERMISSIONS_ERROR": {
	"category": "Permissions",
	"description": "Insufficient permissions",
	"issue_codes": [1017],
	},
	"CONNECTION_MISSING_PARAMETERS_ERROR": {
	"category": "Configuration",
	"description": "Missing parameters",
	"issue_codes": [1018],
	},
	"CONNECTION_DATABASE_TIMEOUT": {
	"category": "Connection",
	"description": "Connection timeout",
	"issue_codes": [1001, 1009],
	},
	"COLUMN_DOES_NOT_EXIST_ERROR": {
	"category": "Query",
	"description": "Column not found",
	"issue_codes": [1003, 1004],
	},
	"TABLE_DOES_NOT_EXIST_ERROR": {
	"category": "Query",
	"description": "Table not found",
	"issue_codes": [1003, 1005],
	},
	"SCHEMA_DOES_NOT_EXIST_ERROR": {
	"category": "Query",
	"description": "Schema not found",
	"issue_codes": [1003, 1016],
	},
	"SYNTAX_ERROR": {
	"category": "Query",
	"description": "SQL syntax error",
	"issue_codes": [1030],
	},
	"OBJECT_DOES_NOT_EXIST_ERROR": {
	"category": "Query",
	"description": "Object not found",
	"issue_codes": [1029],
	},
	"GENERIC_DB_ENGINE_ERROR": {
	"category": "General",
	"description": "Database engine error",
	"issue_codes": [1002],
	},
	}


	def extract_string_from_call(node: ast.Call) -> str \| None:
	"""Extract string from __() or _() translation calls."""
	if not node.args:
	return None
	arg = node.args[0]
	if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
	return arg.value
	elif isinstance(arg, ast.JoinedStr):
	# f-string - try to reconstruct
	parts = []
	for value in arg.values:
	if isinstance(value, ast.Constant):
	parts.append(str(value.value))
	elif isinstance(value, ast.FormattedValue):
	# Just use a placeholder
	parts.append("{...}")
	return "".join(parts)
	return None


	def extract_custom_errors_from_file(filepath: Path) -> dict[str, list[dict[str, Any]]]:
	"""
	Extract custom_errors definitions from a Python engine spec file.

	Returns a dict mapping class names to their custom errors list.
	"""
	results = {}

	try:
	with open(filepath, "r", encoding="utf-8") as f:
	source = f.read()

	tree = ast.parse(source)

	for node in ast.walk(tree):
	if isinstance(node, ast.ClassDef):
	class_name = node.name

	for item in node.body:
	# Look for custom_errors = { ... }
	if (
	isinstance(item, ast.AnnAssign)
	and isinstance(item.target, ast.Name)
	and item.target.id == "custom_errors"
	and isinstance(item.value, ast.Dict)
	):
	errors = extract_errors_from_dict(item.value, source)
	if errors:
	results[class_name] = errors

	# Also handle simple assignment: custom_errors = { ... }
	elif (
	isinstance(item, ast.Assign)
	and len(item.targets) == 1
	and isinstance(item.targets[0], ast.Name)
	and item.targets[0].id == "custom_errors"
	and isinstance(item.value, ast.Dict)
	):
	errors = extract_errors_from_dict(item.value, source)
	if errors:
	results[class_name] = errors

	except (OSError, SyntaxError, ValueError) as e:
	print(f"Error parsing {filepath}: {e}", file=sys.stderr)

	return results


	def extract_regex_info(key: ast.expr) -> dict[str, Any]:
	"""Extract regex pattern info from the dict key."""
	if isinstance(key, ast.Name):
	return {"regex_name": key.id}
	if isinstance(key, ast.Call):
	if (
	isinstance(key.func, ast.Attribute)
	and key.func.attr == "compile"
	and key.args
	and isinstance(key.args[0], ast.Constant)
	):
	return {"regex_pattern": key.args[0].value}
	return {}


	def extract_invalid_fields(extra_node: ast.Dict) -> list[str]:
	"""Extract invalid fields from the extra dict."""
	for k, v in zip(extra_node.keys, extra_node.values, strict=False):
	if (
	isinstance(k, ast.Constant)
	and k.value == "invalid"
	and isinstance(v, ast.List)
	):
	return [elem.value for elem in v.elts if isinstance(elem, ast.Constant)]
	return []


	def extract_error_tuple_info(value: ast.Tuple) -> dict[str, Any]:
	"""Extract error info from the (message, error_type, extra) tuple."""
	result: dict[str, Any] = {}

	# First element: message template
	msg_node = value.elts[0]
	if isinstance(msg_node, ast.Call):
	message = extract_string_from_call(msg_node)
	if message:
	result["message_template"] = message
	elif isinstance(msg_node, ast.Constant):
	result["message_template"] = msg_node.value

	# Second element: SupersetErrorType.SOMETHING
	type_node = value.elts[1]
	if isinstance(type_node, ast.Attribute):
	error_type = type_node.attr
	result["error_type"] = error_type
	if error_type in ERROR_TYPE_INFO:
	type_info = ERROR_TYPE_INFO[error_type]
	result["category"] = type_info["category"]
	result["description"] = type_info["description"]
	result["issue_codes"] = type_info["issue_codes"]

	# Third element: extra dict with invalid fields
	if len(value.elts) >= 3 and isinstance(value.elts[2], ast.Dict):
	invalid_fields = extract_invalid_fields(value.elts[2])
	if invalid_fields:
	result["invalid_fields"] = invalid_fields

	return result


	def extract_errors_from_dict(dict_node: ast.Dict, source: str) -> list[dict[str, Any]]:
	"""Extract error information from a custom_errors dict AST node."""
	errors = []

	for key, value in zip(dict_node.keys, dict_node.values, strict=False):
	if key is None or value is None:
	continue

	error_info = extract_regex_info(key)

	if isinstance(value, ast.Tuple) and len(value.elts) >= 2:
	error_info.update(extract_error_tuple_info(value))

	if error_info.get("error_type") and error_info.get("message_template"):
	errors.append(error_info)

	return errors


	def main() -> None:
	"""Main function to extract custom_errors from all engine specs."""
	# Find the superset root directory
	script_dir = Path(__file__).parent
	root_dir = script_dir.parent.parent
	specs_dir = root_dir / "superset" / "db_engine_specs"

	if not specs_dir.exists():
	print(f"Error: Engine specs directory not found: {specs_dir}", file=sys.stderr)
	sys.exit(1)

	all_errors = {}

	# Process each Python file in the specs directory
	for filepath in sorted(specs_dir.glob("*.py")):
	if filepath.name.startswith("_"):
	continue

	module_name = filepath.stem
	class_errors = extract_custom_errors_from_file(filepath)

	if class_errors:
	# Store errors by module and class
	all_errors[module_name] = class_errors

	# Output as JSON
	print(json.dumps(all_errors, indent=2))


	if __name__ == "__main__":
	main()