blob: df6b9e3c052709ce61bc780a1bd0f78809293740 [file] [log] [blame]
"""
Generate a unified page of documentation for all error conditions.
"""
import json
import os
import re
from itertools import chain
from pathlib import Path
from textwrap import dedent
# To avoid adding new direct dependencies, we import from within mkdocs.
# This is not ideal as unrelated updates to mkdocs may break this script.
from mkdocs.structure.pages import markdown
THIS_DIR = Path(__file__).parent
SPARK_PROJECT_ROOT = THIS_DIR.parents[1]
DOCS_ROOT = SPARK_PROJECT_ROOT / "docs"
ERROR_CONDITIONS_PATH = (
SPARK_PROJECT_ROOT / "common/utils/src/main/resources/error/error-conditions.json"
)
def assemble_message(message_parts):
message = " ".join(message_parts)
cleaned_message = re.sub(r"(<.*?>)", lambda x: f"`{x.group(1)}`", message)
return markdown.markdown(cleaned_message)
def load_error_conditions(path):
with open(path) as f:
raw_error_conditions = json.load(f)
error_conditions = dict()
for name, details in raw_error_conditions.items():
if name.startswith("_LEGACY_ERROR") or name.startswith("INTERNAL_ERROR"):
continue
if "subClass" in details:
for sub_name in details["subClass"]:
details["subClass"][sub_name]["message"] = (
assemble_message(details["subClass"][sub_name]["message"])
)
details["message"] = assemble_message(details["message"])
error_conditions[name] = details
return error_conditions
def anchor_name(condition_name: str, sub_condition_name: str = None):
"""
URLs can, in practice, be up to 2,000 characters long without causing any issues. So we preserve
the condition name mostly as-is for use in the anchor, even when that name is very long.
See: https://stackoverflow.com/a/417184
"""
parts = [
part for part in (condition_name, sub_condition_name)
if part
]
anchor = "-".join(parts).lower().replace("_", "-")
return anchor
def generate_doc_rows(condition_name, condition_details):
condition_row = [
"""
<tr id="{anchor}">
<td>{sql_state}</td>
<td>
<span class="error-condition-name">
<code>
<a href="#{anchor}">#</a>
</code>
{condition_name}
</span>
</td>
<td>{message}</td>
</tr>
"""
.format(
anchor=anchor_name(condition_name),
sql_state=condition_details["sqlState"],
# This inserts soft break opportunities so that if a long name needs to be wrapped
# it will wrap in a visually pleasing manner.
# See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/wbr
condition_name=condition_name.replace("_", "<wbr />_"),
message=condition_details["message"],
)
]
sub_condition_rows = []
if "subClass" in condition_details:
for sub_condition_name in sorted(condition_details["subClass"]):
sub_condition_rows.append(
"""
<tr id="{anchor}">
<td></td>
<td class="error-sub-condition">
<span class="error-condition-name">
<code>
<a href="#{anchor}">#</a>
</code>
{sub_condition_name}
</span>
</td>
<td class="error-sub-condition">{message}</td>
</tr>
"""
.format(
anchor=anchor_name(condition_name, sub_condition_name),
# See comment above for explanation of `<wbr />`.
sub_condition_name=sub_condition_name.replace("_", "<wbr />_"),
message=condition_details["subClass"][sub_condition_name]["message"],
)
)
doc_rows = condition_row + sub_condition_rows
return [
dedent(row).strip()
for row in doc_rows
]
def generate_doc_table(error_conditions):
doc_rows = chain.from_iterable([
generate_doc_rows(condition_name, condition_details)
for condition_name, condition_details
in sorted(
error_conditions.items(),
key=lambda x: (x[1]["sqlState"], x[0]),
)
])
table_html = (
"""
<table id="error-conditions">
<tr>
<th>Error State / SQLSTATE</th>
<th>Error Condition & Sub-Condition</th>
<th>Message</th>
</tr>
{rows}
</table>
"""
)
# We dedent here rather than above so that the interpolated rows (which are not
# indented) don't prevent the dedent from working.
table_html = dedent(table_html).strip().format(rows="\n".join(list(doc_rows)))
return table_html
if __name__ == "__main__":
error_conditions = load_error_conditions(ERROR_CONDITIONS_PATH)
doc_table = generate_doc_table(error_conditions)
(DOCS_ROOT / "_generated").mkdir(exist_ok=True)
html_table_path = DOCS_ROOT / "_generated" / "error-conditions.html"
with open(html_table_path, "w") as f:
f.write(doc_table)
print("Generated:", os.path.relpath(html_table_path, start=SPARK_PROJECT_ROOT))