feat: make sure to quote formulas on Excel export
diff --git a/superset/utils/excel.py b/superset/utils/excel.py
index 8609be5..6025499 100644
--- a/superset/utils/excel.py
+++ b/superset/utils/excel.py
@@ -22,9 +22,30 @@
from superset.utils.core import GenericDataType
+def quote_formulas(df: pd.DataFrame) -> pd.DataFrame:
+ """
+ Make sure to quote any formulas for security reasons.
+ """
+ formula_prefixes = {"=", "+", "-", "@"}
+
+ for col in df.select_dtypes(include="object").columns:
+ df[col] = df[col].apply(
+ lambda x: (
+ f"'{x}"
+ if isinstance(x, str) and len(x) and x[0] in formula_prefixes
+ else x
+ )
+ )
+
+ return df
+
+
def df_to_excel(df: pd.DataFrame, **kwargs: Any) -> Any:
output = io.BytesIO()
+ # make sure formulas are quoted, to prevent malicious injections
+ df = quote_formulas(df)
+
# pylint: disable=abstract-class-instantiated
with pd.ExcelWriter(output, engine="xlsxwriter") as writer:
df.to_excel(writer, **kwargs)
diff --git a/tests/unit_tests/utils/excel_tests.py b/tests/unit_tests/utils/excel_tests.py
index 745beff..deb6d3d 100644
--- a/tests/unit_tests/utils/excel_tests.py
+++ b/tests/unit_tests/utils/excel_tests.py
@@ -34,6 +34,19 @@
assert pd.read_excel(contents)["dt"][0] == "2023-01-01 00:00:00+00:00"
+def test_quote_formulas() -> None:
+ """
+ Test that formulas are quoted in Excel.
+ """
+ df = pd.DataFrame({"formula": ["=SUM(A1:A2)", "normal", "@SUM(A1:A2)"]})
+ contents = df_to_excel(df)
+ assert pd.read_excel(contents)["formula"].tolist() == [
+ "'=SUM(A1:A2)",
+ "normal",
+ "'@SUM(A1:A2)",
+ ]
+
+
def test_column_data_types_with_one_numeric_column():
df = pd.DataFrame(
{