| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /** |
| * This script generates database documentation data from engine spec metadata. |
| * It outputs a JSON file that can be imported by React components for rendering. |
| * |
| * Usage: node scripts/generate-database-docs.mjs |
| * |
| * The script can run in two modes: |
| * 1. With Flask app (full diagnostics) - requires superset to be installed |
| * 2. Fallback mode (documentation only) - parses engine spec `metadata` attributes via AST |
| */ |
| |
| import { spawnSync } from 'child_process'; |
| import fs from 'fs'; |
| import { createRequire } from 'module'; |
| import path from 'path'; |
| import { fileURLToPath } from 'url'; |
| |
| const require = createRequire(import.meta.url); |
| |
| const __filename = fileURLToPath(import.meta.url); |
| const __dirname = path.dirname(__filename); |
| const ROOT_DIR = path.resolve(__dirname, '../..'); |
| const DOCS_DIR = path.resolve(__dirname, '..'); |
| const DATA_OUTPUT_DIR = path.join(DOCS_DIR, 'src/data'); |
| const DATA_OUTPUT_FILE = path.join(DATA_OUTPUT_DIR, 'databases.json'); |
| const MDX_OUTPUT_DIR = path.join(DOCS_DIR, 'docs/databases'); |
| const MDX_SUPPORTED_DIR = path.join(MDX_OUTPUT_DIR, 'supported'); |
| const IMAGES_DIR = path.join(DOCS_DIR, 'static/img/databases'); |
| |
| /** |
| * Try to run the full lib.py script with Flask context |
| */ |
| function tryRunFullScript() { |
| try { |
| console.log('Attempting to run lib.py with Flask context...'); |
| const pythonCode = ` |
| import sys |
| import json |
| sys.path.insert(0, '.') |
| from superset.app import create_app |
| from superset.db_engine_specs.lib import generate_yaml_docs |
| app = create_app() |
| with app.app_context(): |
| docs = generate_yaml_docs() |
| print(json.dumps(docs, default=str)) |
| `; |
| const result = spawnSync('python', ['-c', pythonCode], { |
| cwd: ROOT_DIR, |
| encoding: 'utf-8', |
| timeout: 60000, |
| maxBuffer: 10 * 1024 * 1024, |
| env: { ...process.env, SUPERSET_SECRET_KEY: 'docs-build-key' }, |
| }); |
| |
| if (result.error) { |
| throw result.error; |
| } |
| if (result.status !== 0) { |
| throw new Error(result.stderr || 'Python script failed'); |
| } |
| return JSON.parse(result.stdout); |
| } catch (error) { |
| console.log('Full script execution failed, using fallback mode...'); |
| console.log(' Reason:', error.message?.split('\n')[0] || 'Unknown error'); |
| return null; |
| } |
| } |
| |
| /** |
| * Extract metadata from individual engine spec files using AST parsing |
| * This is the preferred approach - reads directly from spec.metadata attributes |
| * Supports metadata inheritance - child classes inherit and merge with parent metadata |
| */ |
| function extractEngineSpecMetadata() { |
| console.log('Extracting metadata from engine spec files...'); |
| console.log(` ROOT_DIR: ${ROOT_DIR}`); |
| |
| try { |
| const pythonCode = ` |
| import sys |
| import json |
| import ast |
| import os |
| |
| def eval_node(node): |
| """Safely evaluate an AST node as a Python literal.""" |
| if node is None: |
| return None |
| if isinstance(node, ast.Constant): |
| return node.value |
| elif isinstance(node, ast.List): |
| return [eval_node(e) for e in node.elts] |
| elif isinstance(node, ast.Dict): |
| result = {} |
| for k, v in zip(node.keys, node.values): |
| if k is not None: |
| key = eval_node(k) |
| if key is not None: |
| result[key] = eval_node(v) |
| return result |
| elif isinstance(node, ast.Name): |
| # Handle True, False, None constants |
| if node.id == 'True': |
| return True |
| elif node.id == 'False': |
| return False |
| elif node.id == 'None': |
| return None |
| return node.id |
| elif isinstance(node, ast.Attribute): |
| # Handle DatabaseCategory.SOMETHING - return just the attribute name |
| return node.attr |
| elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add): |
| left, right = eval_node(node.left), eval_node(node.right) |
| if isinstance(left, str) and isinstance(right, str): |
| return left + right |
| return None |
| elif isinstance(node, ast.Tuple): |
| return tuple(eval_node(e) for e in node.elts) |
| elif isinstance(node, ast.JoinedStr): |
| # f-strings - just return a placeholder |
| return "<f-string>" |
| return None |
| |
| def deep_merge(base, override): |
| """Deep merge two dictionaries. Override values take precedence.""" |
| if base is None: |
| return override |
| if override is None: |
| return base |
| if not isinstance(base, dict) or not isinstance(override, dict): |
| return override |
| |
| # Fields that should NOT be inherited from parent classes |
| # - compatible_databases: Each class defines its own compatible DBs |
| # - categories: Each class defines its own categories (not extended from parent) |
| NON_INHERITABLE_FIELDS = {'compatible_databases', 'categories'} |
| |
| result = base.copy() |
| # Remove non-inheritable fields from base (they should only come from the class that defines them) |
| for field in NON_INHERITABLE_FIELDS: |
| result.pop(field, None) |
| |
| for key, value in override.items(): |
| if key in result and isinstance(result[key], dict) and isinstance(value, dict): |
| result[key] = deep_merge(result[key], value) |
| elif key in result and isinstance(result[key], list) and isinstance(value, list): |
| # Extend lists from parent (e.g., drivers) |
| result[key] = result[key] + value |
| else: |
| result[key] = value |
| return result |
| |
| databases = {} |
| specs_dir = 'superset/db_engine_specs' |
| errors = [] |
| debug_info = { |
| "cwd": os.getcwd(), |
| "specs_dir_exists": os.path.isdir(specs_dir), |
| "files_checked": 0, |
| "classes_found": 0, |
| "classes_with_metadata": 0, |
| "inherited_metadata": 0, |
| } |
| |
| if not os.path.isdir(specs_dir): |
| print(json.dumps({"error": f"Directory not found: {specs_dir}", "cwd": os.getcwd()})) |
| sys.exit(1) |
| |
| # First pass: collect all class info (name, bases, metadata) |
| class_info = {} # class_name -> {bases: [], metadata: {}, engine_name: str, filename: str} |
| |
| for filename in sorted(os.listdir(specs_dir)): |
| if not filename.endswith('.py') or filename in ('__init__.py', 'lib.py', 'lint_metadata.py'): |
| continue |
| |
| debug_info["files_checked"] += 1 |
| filepath = os.path.join(specs_dir, filename) |
| try: |
| with open(filepath) as f: |
| source = f.read() |
| tree = ast.parse(source) |
| |
| for node in ast.walk(tree): |
| if not isinstance(node, ast.ClassDef): |
| continue |
| |
| # Get base class names |
| base_names = [] |
| for b in node.bases: |
| if isinstance(b, ast.Name): |
| base_names.append(b.id) |
| elif isinstance(b, ast.Attribute): |
| base_names.append(b.attr) |
| |
| is_engine_spec = any('EngineSpec' in name or 'Mixin' in name for name in base_names) |
| if not is_engine_spec: |
| continue |
| |
| # Extract class attributes |
| engine_name = None |
| metadata = None |
| |
| for item in node.body: |
| if isinstance(item, ast.Assign): |
| for target in item.targets: |
| if isinstance(target, ast.Name): |
| if target.id == 'engine_name': |
| val = eval_node(item.value) |
| if isinstance(val, str): |
| engine_name = val |
| elif target.id == 'metadata': |
| metadata = eval_node(item.value) |
| |
| # Check for engine attribute with non-empty value to distinguish |
| # true base classes from product classes like OceanBaseEngineSpec |
| has_non_empty_engine = False |
| for item in node.body: |
| if isinstance(item, ast.Assign): |
| for target in item.targets: |
| if isinstance(target, ast.Name) and target.id == 'engine': |
| # Check if engine value is non-empty string |
| if isinstance(item.value, ast.Constant): |
| has_non_empty_engine = bool(item.value.value) |
| break |
| |
| # True base classes: end with BaseEngineSpec AND don't define engine |
| # or have empty engine (like PostgresBaseEngineSpec with engine = "") |
| is_true_base = ( |
| node.name.endswith('BaseEngineSpec') and not has_non_empty_engine |
| ) or 'Mixin' in node.name |
| |
| # Store class info for inheritance resolution |
| class_info[node.name] = { |
| 'bases': base_names, |
| 'metadata': metadata, |
| 'engine_name': engine_name, |
| 'filename': filename, |
| 'is_base_or_mixin': is_true_base, |
| } |
| except Exception as e: |
| errors.append(f"{filename}: {str(e)}") |
| |
| # Second pass: resolve inheritance and build final metadata |
| def get_inherited_metadata(class_name, visited=None): |
| """Recursively get metadata from parent classes.""" |
| if visited is None: |
| visited = set() |
| if class_name in visited: |
| return {} # Prevent circular inheritance |
| visited.add(class_name) |
| |
| info = class_info.get(class_name) |
| if not info: |
| return {} |
| |
| # Start with parent metadata |
| inherited = {} |
| for base_name in info['bases']: |
| parent_metadata = get_inherited_metadata(base_name, visited.copy()) |
| if parent_metadata: |
| inherited = deep_merge(inherited, parent_metadata) |
| |
| # Merge with own metadata (own takes precedence) |
| if info['metadata']: |
| inherited = deep_merge(inherited, info['metadata']) |
| |
| return inherited |
| |
| for class_name, info in class_info.items(): |
| # Skip base classes and mixins |
| if info['is_base_or_mixin']: |
| continue |
| |
| debug_info["classes_found"] += 1 |
| |
| # Get final metadata with inheritance |
| final_metadata = get_inherited_metadata(class_name) |
| |
| # Remove compatible_databases if not defined by this class (it's not inheritable) |
| own_metadata = info['metadata'] or {} |
| if 'compatible_databases' not in own_metadata and 'compatible_databases' in final_metadata: |
| del final_metadata['compatible_databases'] |
| |
| # Track if we inherited anything |
| if final_metadata and final_metadata != own_metadata: |
| debug_info["inherited_metadata"] += 1 |
| |
| # Use class name as fallback for engine_name |
| display_name = info['engine_name'] or class_name.replace('EngineSpec', '').replace('_', ' ') |
| |
| if final_metadata and isinstance(final_metadata, dict) and display_name: |
| debug_info["classes_with_metadata"] += 1 |
| databases[display_name] = { |
| 'engine': display_name.lower().replace(' ', '_'), |
| 'engine_name': display_name, |
| 'module': info['filename'][:-3], # Remove .py extension |
| 'documentation': final_metadata, |
| 'time_grains': {}, |
| 'score': 0, |
| 'max_score': 0, |
| 'joins': True, |
| 'subqueries': True, |
| 'supports_dynamic_schema': False, |
| 'supports_catalog': False, |
| 'supports_dynamic_catalog': False, |
| 'ssh_tunneling': False, |
| 'query_cancelation': False, |
| 'supports_file_upload': False, |
| 'user_impersonation': False, |
| 'query_cost_estimation': False, |
| 'sql_validation': False, |
| } |
| |
| if errors and not databases: |
| print(json.dumps({"error": "Parse errors", "details": errors, "debug": debug_info}), file=sys.stderr) |
| |
| # Print debug info to stderr for troubleshooting |
| print(json.dumps(debug_info), file=sys.stderr) |
| |
| print(json.dumps(databases, default=str)) |
| `; |
| const result = spawnSync('python3', ['-c', pythonCode], { |
| cwd: ROOT_DIR, |
| encoding: 'utf-8', |
| timeout: 30000, |
| maxBuffer: 10 * 1024 * 1024, |
| }); |
| |
| if (result.error) { |
| throw result.error; |
| } |
| // Log debug info from stderr |
| if (result.stderr) { |
| console.log('Python debug info:', result.stderr.trim()); |
| } |
| if (result.status !== 0) { |
| throw new Error(result.stderr || 'Python script failed'); |
| } |
| const databases = JSON.parse(result.stdout); |
| if (Object.keys(databases).length === 0) { |
| throw new Error('No metadata found in engine specs'); |
| } |
| |
| console.log(`Extracted metadata from ${Object.keys(databases).length} engine specs`); |
| return databases; |
| } catch (err) { |
| console.log('Engine spec metadata extraction failed:', err.message); |
| return null; |
| } |
| } |
| |
| /** |
| * Build statistics from the database data |
| */ |
| function buildStatistics(databases) { |
| const stats = { |
| totalDatabases: Object.keys(databases).length, |
| withDocumentation: 0, |
| withConnectionString: 0, |
| withDrivers: 0, |
| withAuthMethods: 0, |
| supportsJoins: 0, |
| supportsSubqueries: 0, |
| supportsDynamicSchema: 0, |
| supportsCatalog: 0, |
| averageScore: 0, |
| maxScore: 0, |
| byCategory: {}, |
| }; |
| |
| let totalScore = 0; |
| |
| for (const [name, db] of Object.entries(databases)) { |
| const docs = db.documentation || {}; |
| |
| if (Object.keys(docs).length > 0) stats.withDocumentation++; |
| if (docs.connection_string || docs.drivers?.length > 0) |
| stats.withConnectionString++; |
| if (docs.drivers?.length > 0) stats.withDrivers++; |
| if (docs.authentication_methods?.length > 0) stats.withAuthMethods++; |
| if (db.joins) stats.supportsJoins++; |
| if (db.subqueries) stats.supportsSubqueries++; |
| if (db.supports_dynamic_schema) stats.supportsDynamicSchema++; |
| if (db.supports_catalog) stats.supportsCatalog++; |
| |
| totalScore += db.score || 0; |
| if (db.max_score > stats.maxScore) stats.maxScore = db.max_score; |
| |
| // Use categories from documentation metadata (computed by Python) |
| // Each database can belong to multiple categories |
| const categories = docs.categories || ['OTHER']; |
| for (const cat of categories) { |
| // Map category constant names to display names |
| const categoryDisplayNames = { |
| 'CLOUD_AWS': 'Cloud - AWS', |
| 'CLOUD_GCP': 'Cloud - Google', |
| 'CLOUD_AZURE': 'Cloud - Azure', |
| 'CLOUD_DATA_WAREHOUSES': 'Cloud Data Warehouses', |
| 'APACHE_PROJECTS': 'Apache Projects', |
| 'TRADITIONAL_RDBMS': 'Traditional RDBMS', |
| 'ANALYTICAL_DATABASES': 'Analytical Databases', |
| 'SEARCH_NOSQL': 'Search & NoSQL', |
| 'QUERY_ENGINES': 'Query Engines', |
| 'TIME_SERIES': 'Time Series Databases', |
| 'OTHER': 'Other Databases', |
| 'OPEN_SOURCE': 'Open Source', |
| 'HOSTED_OPEN_SOURCE': 'Hosted Open Source', |
| 'PROPRIETARY': 'Proprietary', |
| }; |
| const displayName = categoryDisplayNames[cat] || cat; |
| if (!stats.byCategory[displayName]) { |
| stats.byCategory[displayName] = []; |
| } |
| stats.byCategory[displayName].push(name); |
| } |
| } |
| |
| stats.averageScore = Math.round(totalScore / stats.totalDatabases); |
| |
| return stats; |
| } |
| |
| /** |
| * Convert database name to a URL-friendly slug |
| */ |
| function toSlug(name) { |
| return name |
| .toLowerCase() |
| .replace(/[^a-z0-9]+/g, '-') |
| .replace(/^-|-$/g, ''); |
| } |
| |
| /** |
| * Generate MDX content for a single database page |
| */ |
| function generateDatabaseMDX(name, db) { |
| const description = db.documentation?.description || `Documentation for ${name} database connection.`; |
| const shortDesc = description |
| .slice(0, 160) |
| .replace(/\\/g, '\\\\') |
| .replace(/"/g, '\\"'); |
| |
| return `--- |
| title: ${name} |
| sidebar_label: ${name} |
| description: "${shortDesc}" |
| hide_title: true |
| --- |
| |
| {/* |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| */} |
| |
| import { DatabasePage } from '@site/src/components/databases'; |
| import databaseData from '@site/src/data/databases.json'; |
| |
| <DatabasePage name="${name}" database={databaseData.databases["${name}"]} /> |
| `; |
| } |
| |
| /** |
| * Generate the index MDX for the databases overview |
| */ |
| function generateIndexMDX(statistics, usedFlaskContext = true) { |
| const fallbackNotice = usedFlaskContext ? '' : ` |
| :::info Developer Note |
| This documentation was built without Flask context, so feature diagnostics (scores, time grain support, etc.) |
| may not reflect actual database capabilities. For full diagnostics, build docs locally with: |
| |
| \`\`\`bash |
| cd docs && npm run gen-db-docs |
| \`\`\` |
| |
| This requires a working Superset development environment. |
| ::: |
| |
| `; |
| |
| return `--- |
| title: Connecting to Databases |
| sidebar_label: Overview |
| sidebar_position: 1 |
| --- |
| |
| {/* |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| */} |
| |
| import { DatabaseIndex } from '@site/src/components/databases'; |
| import databaseData from '@site/src/data/databases.json'; |
| |
| # Connecting to Databases |
| |
| Superset does not ship bundled with connectivity to databases. The main step in connecting |
| Superset to a database is to **install the proper database driver(s)** in your environment. |
| |
| :::note |
| You'll need to install the required packages for the database you want to use as your metadata database |
| as well as the packages needed to connect to the databases you want to access through Superset. |
| For information about setting up Superset's metadata database, please refer to |
| installation documentations ([Docker Compose](/docs/installation/docker-compose), [Kubernetes](/docs/installation/kubernetes)) |
| ::: |
| |
| ## Supported Databases |
| |
| Superset supports **${statistics.totalDatabases} databases** with varying levels of feature support. |
| Click on any database name to see detailed documentation including connection strings, |
| authentication methods, and configuration options. |
| |
| <DatabaseIndex data={databaseData} /> |
| |
| ## Installing Database Drivers |
| |
| Superset requires a Python [DB-API database driver](https://peps.python.org/pep-0249/) |
| and a [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/20/dialects/) to be installed for |
| each database engine you want to connect to. |
| |
| ### Installing Drivers in Docker |
| |
| For Docker deployments, create a \`requirements-local.txt\` file in the \`docker\` directory: |
| |
| \`\`\`bash |
| # Create the requirements file |
| touch ./docker/requirements-local.txt |
| |
| # Add your driver (e.g., for PostgreSQL) |
| echo "psycopg2-binary" >> ./docker/requirements-local.txt |
| \`\`\` |
| |
| Then restart your containers. The drivers will be installed automatically. |
| |
| ### Installing Drivers with pip |
| |
| For non-Docker installations: |
| |
| \`\`\`bash |
| pip install <driver-package> |
| \`\`\` |
| |
| See individual database pages for the specific driver packages needed. |
| |
| ## Connecting Through the UI |
| |
| 1. Go to **Settings → Data: Database Connections** |
| 2. Click **+ DATABASE** |
| 3. Select your database type or enter a SQLAlchemy URI |
| 4. Click **Test Connection** to verify |
| 5. Click **Connect** to save |
| |
| ## Contributing |
| |
| To add or update database documentation, add a \`metadata\` attribute to your engine spec class in |
| \`superset/db_engine_specs/\`. Documentation is auto-generated from these metadata attributes. |
| |
| See [METADATA_STATUS.md](https://github.com/apache/superset/blob/master/superset/db_engine_specs/METADATA_STATUS.md) |
| for the current status of database documentation and the [README](https://github.com/apache/superset/blob/master/superset/db_engine_specs/README.md) for the metadata schema. |
| ${fallbackNotice}`; |
| } |
| |
| const README_PATH = path.join(ROOT_DIR, 'README.md'); |
| const README_START_MARKER = '<!-- SUPPORTED_DATABASES_START -->'; |
| const README_END_MARKER = '<!-- SUPPORTED_DATABASES_END -->'; |
| |
| /** |
| * Read image dimensions, with fallback SVG viewBox parsing for cases where |
| * image-size can't handle SVG width/height attributes (e.g., scientific notation). |
| */ |
| function getImageDimensions(imgPath) { |
| const sizeOf = require('image-size'); |
| try { |
| const dims = sizeOf(imgPath); |
| // image-size may misparse SVG attributes (e.g. width="1e3" → 1). |
| // Fall back to viewBox parsing if a dimension looks wrong. |
| if (dims.type === 'svg' && (dims.width < 2 || dims.height < 2)) { |
| const content = fs.readFileSync(imgPath, 'utf-8'); |
| const vbMatch = content.match(/viewBox=["']([^"']+)["']/); |
| if (vbMatch) { |
| const parts = vbMatch[1].trim().split(/[\s,]+/).map(Number); |
| if (parts.length >= 4 && parts[2] > 0 && parts[3] > 0) { |
| return { width: parts[2], height: parts[3] }; |
| } |
| } |
| } |
| if (dims.width > 0 && dims.height > 0) { |
| return { width: dims.width, height: dims.height }; |
| } |
| } catch { /* fall through */ } |
| return null; |
| } |
| |
| /** |
| * Compute display dimensions that fit within a bounding box while preserving |
| * the image's aspect ratio. Enforces a minimum height so very wide logos |
| * remain legible. |
| */ |
| function fitToBoundingBox(imgWidth, imgHeight, maxWidth, maxHeight, minHeight) { |
| const ratio = imgWidth / imgHeight; |
| // Start at max height, compute width |
| let h = maxHeight; |
| let w = h * ratio; |
| // If too wide, cap width and reduce height |
| if (w > maxWidth) { |
| w = maxWidth; |
| h = w / ratio; |
| } |
| // If height fell below minimum, enforce minimum (allow width to exceed max) |
| if (h < minHeight) { |
| h = minHeight; |
| w = h * ratio; |
| } |
| return { width: Math.round(w), height: Math.round(h) }; |
| } |
| |
| /** |
| * Generate the database logos HTML for README.md |
| * Only includes databases that have logos and homepage URLs. |
| * Deduplicates by logo filename to match the docs homepage behavior. |
| * Reads actual image dimensions to preserve aspect ratios. |
| */ |
| function generateReadmeLogos(databases) { |
| // Get databases with logos and homepage URLs, sorted alphabetically, |
| // deduplicated by logo filename (matches docs homepage logic in index.tsx) |
| const seenLogos = new Set(); |
| const dbsWithLogos = Object.entries(databases) |
| .filter(([, db]) => db.documentation?.logo && db.documentation?.homepage_url) |
| .sort(([a], [b]) => a.localeCompare(b)) |
| .filter(([, db]) => { |
| const logo = db.documentation.logo; |
| if (seenLogos.has(logo)) return false; |
| seenLogos.add(logo); |
| return true; |
| }); |
| |
| if (dbsWithLogos.length === 0) { |
| return ''; |
| } |
| |
| const MAX_WIDTH = 150; |
| const MAX_HEIGHT = 40; |
| const MIN_HEIGHT = 24; |
| |
| const DOCS_BASE = 'https://superset.apache.org/docs/databases/supported'; |
| |
| // Generate linked logo tags with aspect-ratio-preserving dimensions |
| const logoTags = dbsWithLogos.map(([name, db]) => { |
| const logo = db.documentation.logo; |
| const slug = name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, ''); |
| const imgPath = path.join(IMAGES_DIR, logo); |
| |
| const dims = getImageDimensions(imgPath); |
| let sizeAttrs; |
| if (dims) { |
| const { width, height } = fitToBoundingBox(dims.width, dims.height, MAX_WIDTH, MAX_HEIGHT, MIN_HEIGHT); |
| sizeAttrs = `width="${width}" height="${height}"`; |
| } else { |
| console.warn(` Could not read dimensions for ${logo}, using height-only fallback`); |
| sizeAttrs = `height="${MAX_HEIGHT}"`; |
| } |
| |
| const img = `<img src="docs/static/img/databases/${logo}" alt="${name}" ${sizeAttrs} />`; |
| return ` <a href="${DOCS_BASE}/${slug}" title="${name}">${img}</a>`; |
| }); |
| |
| // Use between logos for spacing (GitHub strips style/class attributes) |
| return `<p align="center"> |
| ${logoTags.join(' \n')} |
| </p>`; |
| } |
| |
| /** |
| * Update the README.md with generated database logos |
| */ |
| function updateReadme(databases) { |
| if (!fs.existsSync(README_PATH)) { |
| console.log('README.md not found, skipping update'); |
| return false; |
| } |
| |
| const content = fs.readFileSync(README_PATH, 'utf-8'); |
| |
| // Check if markers exist |
| if (!content.includes(README_START_MARKER) || !content.includes(README_END_MARKER)) { |
| console.log('README.md missing database markers, skipping update'); |
| console.log(` Add ${README_START_MARKER} and ${README_END_MARKER} to enable auto-generation`); |
| return false; |
| } |
| |
| // Generate new logos section |
| const logosHtml = generateReadmeLogos(databases); |
| |
| // Replace content between markers |
| const pattern = new RegExp( |
| `${README_START_MARKER}[\\s\\S]*?${README_END_MARKER}`, |
| 'g' |
| ); |
| const newContent = content.replace( |
| pattern, |
| `${README_START_MARKER}\n${logosHtml}\n${README_END_MARKER}` |
| ); |
| |
| if (newContent !== content) { |
| fs.writeFileSync(README_PATH, newContent); |
| console.log('Updated README.md database logos'); |
| return true; |
| } |
| |
| console.log('README.md database logos unchanged'); |
| return false; |
| } |
| |
| /** |
| * Extract custom_errors from engine specs for troubleshooting documentation |
| * Returns a map of module names to their custom errors |
| */ |
| function extractCustomErrors() { |
| console.log('Extracting custom_errors from engine specs...'); |
| |
| try { |
| const scriptPath = path.join(__dirname, 'extract_custom_errors.py'); |
| const result = spawnSync('python3', [scriptPath], { |
| cwd: ROOT_DIR, |
| encoding: 'utf-8', |
| timeout: 30000, |
| maxBuffer: 10 * 1024 * 1024, |
| }); |
| |
| if (result.error) { |
| throw result.error; |
| } |
| if (result.status !== 0) { |
| throw new Error(result.stderr || 'Python script failed'); |
| } |
| |
| const customErrors = JSON.parse(result.stdout); |
| const moduleCount = Object.keys(customErrors).length; |
| const errorCount = Object.values(customErrors).reduce((sum, classes) => |
| sum + Object.values(classes).reduce((s, errs) => s + errs.length, 0), 0); |
| console.log(` Found ${errorCount} custom errors across ${moduleCount} modules`); |
| return customErrors; |
| } catch (err) { |
| console.log(' Could not extract custom_errors:', err.message); |
| return null; |
| } |
| } |
| |
| /** |
| * Merge custom_errors into database documentation |
| * Maps by module name since that's how both datasets are keyed |
| */ |
| function mergeCustomErrors(databases, customErrors) { |
| if (!customErrors) return; |
| |
| let mergedCount = 0; |
| |
| for (const [, db] of Object.entries(databases)) { |
| if (!db.module) continue; |
| // Normalize module name: Flask mode uses full path (superset.db_engine_specs.postgres), |
| // but customErrors is keyed by file stem (postgres) |
| const moduleName = db.module.split('.').pop(); |
| if (!customErrors[moduleName]) continue; |
| |
| // Get all errors from all classes in this module |
| const moduleErrors = customErrors[moduleName]; |
| const allErrors = []; |
| |
| for (const classErrors of Object.values(moduleErrors)) { |
| allErrors.push(...classErrors); |
| } |
| |
| if (allErrors.length > 0) { |
| // Add to documentation |
| db.documentation = db.documentation || {}; |
| db.documentation.custom_errors = allErrors; |
| mergedCount++; |
| } |
| } |
| |
| if (mergedCount > 0) { |
| console.log(` Merged custom_errors into ${mergedCount} database docs`); |
| } |
| } |
| |
| /** |
| * Load existing database data if available |
| */ |
| function loadExistingData() { |
| if (!fs.existsSync(DATA_OUTPUT_FILE)) { |
| return null; |
| } |
| |
| try { |
| const content = fs.readFileSync(DATA_OUTPUT_FILE, 'utf-8'); |
| return JSON.parse(content); |
| } catch (error) { |
| console.log('Could not load existing data:', error.message); |
| return null; |
| } |
| } |
| |
| /** |
| * Merge new documentation with existing diagnostics |
| * Preserves score, time_grains, and feature flags from existing data |
| */ |
| function mergeWithExistingDiagnostics(newDatabases, existingData) { |
| if (!existingData?.databases) return newDatabases; |
| |
| const diagnosticFields = [ |
| 'score', 'max_score', 'time_grains', 'joins', 'subqueries', |
| 'supports_dynamic_schema', 'supports_catalog', 'supports_dynamic_catalog', |
| 'ssh_tunneling', 'query_cancelation', 'supports_file_upload', |
| 'user_impersonation', 'query_cost_estimation', 'sql_validation' |
| ]; |
| |
| for (const [name, db] of Object.entries(newDatabases)) { |
| const existingDb = existingData.databases[name]; |
| if (existingDb && existingDb.score > 0) { |
| // Preserve diagnostics from existing data |
| for (const field of diagnosticFields) { |
| if (existingDb[field] !== undefined) { |
| db[field] = existingDb[field]; |
| } |
| } |
| } |
| } |
| |
| const preserved = Object.values(newDatabases).filter(d => d.score > 0).length; |
| if (preserved > 0) { |
| console.log(`Preserved diagnostics for ${preserved} databases from existing data`); |
| } |
| |
| return newDatabases; |
| } |
| |
| /** |
| * Main function |
| */ |
| async function main() { |
| console.log('Generating database documentation...\n'); |
| |
| // Ensure output directories exist |
| if (!fs.existsSync(DATA_OUTPUT_DIR)) { |
| fs.mkdirSync(DATA_OUTPUT_DIR, { recursive: true }); |
| } |
| if (!fs.existsSync(MDX_OUTPUT_DIR)) { |
| fs.mkdirSync(MDX_OUTPUT_DIR, { recursive: true }); |
| } |
| |
| // Load existing data for potential merge |
| const existingData = loadExistingData(); |
| |
| // Try sources in order of preference: |
| // 1. Full script with Flask context (richest data with diagnostics) |
| // 2. Engine spec metadata files (works in CI without Flask) |
| let databases = tryRunFullScript(); |
| let usedFlaskContext = !!databases; |
| |
| if (!databases) { |
| // Extract from engine spec metadata (preferred for CI) |
| databases = extractEngineSpecMetadata(); |
| } |
| |
| if (!databases || Object.keys(databases).length === 0) { |
| console.error('Failed to generate database documentation data.'); |
| console.error('Could not extract from Flask app or engine spec metadata.'); |
| process.exit(1); |
| } |
| |
| console.log(`Processed ${Object.keys(databases).length} databases\n`); |
| |
| // Check if new data has scores; if not, preserve existing diagnostics |
| const hasNewScores = Object.values(databases).some((db) => db.score > 0); |
| if (!hasNewScores && existingData) { |
| databases = mergeWithExistingDiagnostics(databases, existingData); |
| } |
| |
| // Extract and merge custom_errors for troubleshooting documentation |
| const customErrors = extractCustomErrors(); |
| mergeCustomErrors(databases, customErrors); |
| |
| // Build statistics |
| const statistics = buildStatistics(databases); |
| |
| // Create the final output structure |
| const output = { |
| generated: new Date().toISOString(), |
| statistics, |
| databases, |
| }; |
| |
| // Write the JSON file (with trailing newline for POSIX compliance) |
| fs.writeFileSync(DATA_OUTPUT_FILE, JSON.stringify(output, null, 2) + '\n'); |
| console.log(`Generated: ${path.relative(DOCS_DIR, DATA_OUTPUT_FILE)}`); |
| |
| |
| // Ensure supported directory exists |
| if (!fs.existsSync(MDX_SUPPORTED_DIR)) { |
| fs.mkdirSync(MDX_SUPPORTED_DIR, { recursive: true }); |
| } |
| |
| // Clean up old MDX files that are no longer in the database list |
| console.log(`\nCleaning up old MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`); |
| const existingMdxFiles = fs.readdirSync(MDX_SUPPORTED_DIR).filter(f => f.endsWith('.mdx')); |
| const validSlugs = new Set(Object.keys(databases).map(name => `${toSlug(name)}.mdx`)); |
| let removedCount = 0; |
| for (const file of existingMdxFiles) { |
| if (!validSlugs.has(file)) { |
| fs.unlinkSync(path.join(MDX_SUPPORTED_DIR, file)); |
| removedCount++; |
| } |
| } |
| if (removedCount > 0) { |
| console.log(` Removed ${removedCount} outdated MDX files`); |
| } |
| |
| // Generate individual MDX files for each database in supported/ subdirectory |
| console.log(`\nGenerating MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`); |
| |
| let mdxCount = 0; |
| for (const [name, db] of Object.entries(databases)) { |
| const slug = toSlug(name); |
| const mdxContent = generateDatabaseMDX(name, db); |
| const mdxPath = path.join(MDX_SUPPORTED_DIR, `${slug}.mdx`); |
| fs.writeFileSync(mdxPath, mdxContent); |
| mdxCount++; |
| } |
| console.log(` Generated ${mdxCount} database pages`); |
| |
| // Generate index page in parent databases/ directory |
| const indexContent = generateIndexMDX(statistics, usedFlaskContext); |
| const indexPath = path.join(MDX_OUTPUT_DIR, 'index.mdx'); |
| fs.writeFileSync(indexPath, indexContent); |
| console.log(` Generated index page`); |
| |
| // Generate _category_.json for databases/ directory |
| const categoryJson = { |
| label: 'Databases', |
| position: 1, |
| link: { |
| type: 'doc', |
| id: 'databases/index', |
| }, |
| }; |
| fs.writeFileSync( |
| path.join(MDX_OUTPUT_DIR, '_category_.json'), |
| JSON.stringify(categoryJson, null, 2) + '\n' |
| ); |
| |
| // Generate _category_.json for supported/ subdirectory (collapsible) |
| const supportedCategoryJson = { |
| label: 'Supported Databases', |
| position: 2, |
| collapsed: true, |
| collapsible: true, |
| }; |
| fs.writeFileSync( |
| path.join(MDX_SUPPORTED_DIR, '_category_.json'), |
| JSON.stringify(supportedCategoryJson, null, 2) + '\n' |
| ); |
| console.log(` Generated _category_.json files`); |
| |
| // Update README.md database logos (only when explicitly requested) |
| if (process.env.UPDATE_README === 'true' || process.argv.includes('--update-readme')) { |
| console.log(''); |
| updateReadme(databases); |
| } |
| |
| console.log(`\nStatistics:`); |
| console.log(` Total databases: ${statistics.totalDatabases}`); |
| console.log(` With documentation: ${statistics.withDocumentation}`); |
| console.log(` With connection strings: ${statistics.withConnectionString}`); |
| console.log(` Categories: ${Object.keys(statistics.byCategory).length}`); |
| |
| console.log('\nDone!'); |
| } |
| |
| main().catch(console.error); |