| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| |
| import pandas as pd |
| import pytest |
| from flask_babel import lazy_gettext as _ |
| from sqlalchemy.orm.session import Session |
| |
| from superset.charts.client_processing import apply_client_processing, pivot_df, table |
| from superset.common.chart_data import ChartDataResultFormat |
| from superset.utils.core import GenericDataType |
| |
| |
| def test_pivot_df_no_cols_no_rows_single_metric(): |
| """ |
| Pivot table when no cols/rows and 1 metric are selected. |
| """ |
| # when no cols/rows are selected there are no groupbys in the query, |
| # and the data has only the metric(s) |
| df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}}) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | SUM(num) | |
| |---:|------------:| |
| | 0 | 8.06797e+07 | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)',) | |
| |:-----------------|----------------:| |
| | ('{_("Total")} (Sum)',) | 8.06797e+07 | |
| """.strip() |
| ) |
| |
| # transpose_pivot and combine_metrics do nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | |
| |:-----------------|----------------:| |
| | ('Total (Sum)',) | 8.06797e+07 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows will pivot the table, moving the metrics |
| # to rows |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:--------------|-------------------:| |
| | ('SUM(num)',) | 8.06797e+07 | |
| """.strip() |
| ) |
| |
| # showing totals |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)',) | ('Total (Sum)',) | |
| |:-----------------|----------------:|-------------------:| |
| | ('{_("Total")} (Sum)',) | 8.06797e+07 | 8.06797e+07 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_no_cols_no_rows_two_metrics(): |
| """ |
| Pivot table when no cols/rows and 2 metrics are selected. |
| """ |
| # when no cols/rows are selected there are no groupbys in the query, |
| # and the data has only the metrics |
| df = pd.DataFrame.from_dict({"SUM(num)": {0: 80679663}, "MAX(num)": {0: 37296}}) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | SUM(num) | MAX(num) | |
| |---:|------------:|-----------:| |
| | 0 | 8.06797e+07 | 37296 | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:-----------------|----------------:|----------------:| |
| | ('{_("Total")} (Sum)',) | 8.06797e+07 | 37296 | |
| """.strip() |
| ) |
| |
| # transpose_pivot and combine_metrics do nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:-----------------|----------------:|----------------:| |
| | ('Total (Sum)',) | 8.06797e+07 | 37296 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows will pivot the table, moving the metrics |
| # to rows |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:--------------|-------------------:| |
| | ('SUM(num)',) | 8.06797e+07 | |
| | ('MAX(num)',) | 37296 | |
| """.strip() |
| ) |
| |
| # when showing totals we only add a column, since adding a row |
| # would be redundant |
| pivoted = pivot_df( |
| df, |
| rows=[], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)',) | ('MAX(num)',) | ('{_("Total")} (Sum)',) | |
| |:-----------------|----------------:|----------------:|-------------------:| |
| | ('{_("Total")} (Sum)',) | 8.06797e+07 | 37296 | 8.0717e+07 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_single_row_two_metrics(): |
| """ |
| Pivot table when a single column and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "gender": {0: "girl", 1: "boy"}, |
| "SUM(num)": {0: 118065, 1: 47123}, |
| "MAX(num)": {0: 2588, 1: 1280}, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | gender | SUM(num) | MAX(num) | |
| |---:|:---------|-----------:|-----------:| |
| | 0 | girl | 118065 | 2588 | |
| | 1 | boy | 47123 | 1280 | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | 47123 | 1280 | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| |
| | ('{_("Total")} (Sum)',) | 47123 | 118065 | 1280 | 2588 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics does nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | 47123 | 1280 | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('SUM(num)',) | ('MAX(num)',) | ('{_("Total")} (Sum)',) | |
| |:-----------------|----------------:|----------------:|-------------------:| |
| | ('boy',) | 47123 | 1280 | 48403 | |
| | ('girl',) | 118065 | 2588 | 120653 | |
| | ('{_("Total")} (Sum)',) | 165188 | 3868 | 169056 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:-------------------------|-------------------:| |
| | ('SUM(num)', 'boy') | 47123 | |
| | ('SUM(num)', 'girl') | 118065 | |
| | ('SUM(num)', 'Subtotal') | 165188 | |
| | ('MAX(num)', 'boy') | 1280 | |
| | ('MAX(num)', 'girl') | 2588 | |
| | ('MAX(num)', 'Subtotal') | 3868 | |
| | ('{_("Total")} (Sum)', '') | 169056 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:---------------------|-------------------:| |
| | ('boy', 'SUM(num)') | 47123 | |
| | ('boy', 'MAX(num)') | 1280 | |
| | ('boy', 'Subtotal') | 48403 | |
| | ('girl', 'SUM(num)') | 118065 | |
| | ('girl', 'MAX(num)') | 2588 | |
| | ('girl', 'Subtotal') | 120653 | |
| | ('{_("Total")} (Sum)', '') | 169056 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_single_row_null_values(): |
| """ |
| Pivot table when a single column and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "gender": {0: "girl", 1: "boy"}, |
| "SUM(num)": {0: 118065, 1: None}, |
| "MAX(num)": {0: 2588, 1: None}, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | gender | SUM(num) | MAX(num) | |
| |---:|:---------|-----------:|-----------:| |
| | 0 | girl | 118065 | 2588 | |
| | 1 | boy | nan | nan | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | nan | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| |
| | ('Total (Sum)',) | nan | 118065 | nan | 2588 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics does nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | nan | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | ('Total (Sum)',) | |
| |:-----------------|----------------:|----------------:|:-------------------| |
| | ('boy',) | nan | nan | nannan | |
| | ('girl',) | 118065 | 2588 | 120653.0 | |
| | ('Total (Sum)',) | 118065 | 2588 | 120653.0 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:-------------------------|-------------------:| |
| | ('SUM(num)', 'boy') | nan | |
| | ('SUM(num)', 'girl') | 118065 | |
| | ('SUM(num)', 'Subtotal') | 118065 | |
| | ('MAX(num)', 'boy') | nan | |
| | ('MAX(num)', 'girl') | 2588 | |
| | ('MAX(num)', 'Subtotal') | 2588 | |
| | ('{_("Total")} (Sum)', '') | 120653 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:---------------------|-------------------:| |
| | ('boy', 'SUM(num)') | nan | |
| | ('boy', 'MAX(num)') | nan | |
| | ('boy', 'Subtotal') | 0 | |
| | ('girl', 'SUM(num)') | 118065 | |
| | ('girl', 'MAX(num)') | 2588 | |
| | ('girl', 'Subtotal') | 120653 | |
| | ('{_("Total")} (Sum)', '') | 120653 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_single_row_null_mix_values_strings(): |
| """ |
| Pivot table when a single column and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "gender": {0: "girl", 1: "boy"}, |
| "SUM(num)": {0: 118065, 1: "NULL"}, |
| "MAX(num)": {0: 2588, 1: None}, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | gender | SUM(num) | MAX(num) | |
| |---:|:---------|:-----------|-----------:| |
| | 0 | girl | 118065 | 2588 | |
| | 1 | boy | NULL | nan | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|:----------------|----------------:| |
| | ('boy',) | NULL | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:-----------------|:----------------------|-----------------------:|----------------------:|-----------------------:| |
| | ('Total (Sum)',) | NULL | 118065 | nan | 2588 | |
| |
| """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics does nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|:----------------|----------------:| |
| | ('boy',) | NULL | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | ('Total (Sum)',) | |
| |:-----------------|:----------------|----------------:|:-------------------| |
| | ('boy',) | NULL | nan | NULLnan | |
| | ('girl',) | 118065 | 2588 | 120653.0 | |
| | ('Total (Sum)',) | 118065.0 | 2588 | 120653.0 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('Total (Sum)',) | |
| |:---------------------|:-------------------| |
| | ('boy', 'SUM(num)') | NULL | |
| | ('boy', 'MAX(num)') | nan | |
| | ('girl', 'SUM(num)') | 118065 | |
| | ('girl', 'MAX(num)') | 2588.0 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_single_row_null_mix_values_numbers(): |
| """ |
| Pivot table when a single column and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "gender": {0: "girl", 1: "boy"}, |
| "SUM(num)": {0: 118065, 1: 21}, |
| "MAX(num)": {0: 2588, 1: None}, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | gender | SUM(num) | MAX(num) | |
| |---:|:---------|-----------:|-----------:| |
| | 0 | girl | 118065 | 2588 | |
| | 1 | boy | 21 | nan | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | 21 | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| |
| | ('Total (Sum)',) | 21 | 118065 | nan | 2588 | """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics does nothing in this case |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:----------|----------------:|----------------:| |
| | ('boy',) | 21 | nan | |
| | ('girl',) | 118065 | 2588 | |
| """.strip() |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:-----------------|----------------:|----------------:| |
| | ('boy',) | 21 | nan | |
| | ('girl',) | 118065 | 2588 | |
| | ('Total (Sum)',) | 118086 | 2588 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('Total (Sum)',) | |
| |:---------------------|-------------------:| |
| | ('SUM(num)', 'boy') | 21 | |
| | ('SUM(num)', 'girl') | 118065 | |
| | ('MAX(num)', 'boy') | nan | |
| | ('MAX(num)', 'girl') | 2588 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender"], |
| columns=[], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == f""" |
| | | ('{_("Total")} (Sum)',) | |
| |:---------------------|-------------------:| |
| | ('boy', 'SUM(num)') | 21 | |
| | ('boy', 'MAX(num)') | nan | |
| | ('girl', 'SUM(num)') | 118065 | |
| | ('girl', 'MAX(num)') | 2588 | |
| """.strip() |
| ) |
| |
| |
| def test_pivot_df_complex(): |
| """ |
| Pivot table when a column, rows and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "state": { |
| 0: "CA", |
| 1: "CA", |
| 2: "CA", |
| 3: "FL", |
| 4: "CA", |
| 5: "CA", |
| 6: "FL", |
| 7: "FL", |
| 8: "FL", |
| 9: "CA", |
| 10: "FL", |
| 11: "FL", |
| }, |
| "gender": { |
| 0: "girl", |
| 1: "boy", |
| 2: "girl", |
| 3: "girl", |
| 4: "girl", |
| 5: "girl", |
| 6: "boy", |
| 7: "girl", |
| 8: "girl", |
| 9: "boy", |
| 10: "boy", |
| 11: "girl", |
| }, |
| "name": { |
| 0: "Amy", |
| 1: "Edward", |
| 2: "Sophia", |
| 3: "Amy", |
| 4: "Cindy", |
| 5: "Dawn", |
| 6: "Edward", |
| 7: "Sophia", |
| 8: "Dawn", |
| 9: "Tony", |
| 10: "Tony", |
| 11: "Cindy", |
| }, |
| "SUM(num)": { |
| 0: 45426, |
| 1: 31290, |
| 2: 18859, |
| 3: 14740, |
| 4: 14149, |
| 5: 11403, |
| 6: 9395, |
| 7: 7181, |
| 8: 5089, |
| 9: 3765, |
| 10: 2673, |
| 11: 1218, |
| }, |
| "MAX(num)": { |
| 0: 2227, |
| 1: 1280, |
| 2: 2588, |
| 3: 854, |
| 4: 842, |
| 5: 1157, |
| 6: 389, |
| 7: 1187, |
| 8: 461, |
| 9: 598, |
| 10: 247, |
| 11: 217, |
| }, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | state | gender | name | SUM(num) | MAX(num) | |
| |---:|:--------|:---------|:-------|-----------:|-----------:| |
| | 0 | CA | girl | Amy | 45426 | 2227 | |
| | 1 | CA | boy | Edward | 31290 | 1280 | |
| | 2 | CA | girl | Sophia | 18859 | 2588 | |
| | 3 | FL | girl | Amy | 14740 | 854 | |
| | 4 | CA | girl | Cindy | 14149 | 842 | |
| | 5 | CA | girl | Dawn | 11403 | 1157 | |
| | 6 | FL | boy | Edward | 9395 | 389 | |
| | 7 | FL | girl | Sophia | 7181 | 1187 | |
| | 8 | FL | girl | Dawn | 5089 | 461 | |
| | 9 | CA | boy | Tony | 3765 | 598 | |
| | 10 | FL | boy | Tony | 2673 | 247 | |
| | 11 | FL | girl | Cindy | 1218 | 217 | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| |
| # Sort the pivoted DataFrame to ensure deterministic output |
| pivoted_sorted = pivoted.sort_index() |
| |
| assert ( |
| pivoted_sorted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') | |
| |:-------------------|---------------------:|---------------------:|---------------------:|---------------------:| |
| | ('boy', 'Edward') | 31290 | 9395 | 1280 | 389 | |
| | ('boy', 'Tony') | 3765 | 2673 | 598 | 247 | |
| | ('girl', 'Amy') | 45426 | 14740 | 2227 | 854 | |
| | ('girl', 'Cindy') | 14149 | 1218 | 842 | 217 | |
| | ('girl', 'Dawn') | 11403 | 5089 | 1157 | 461 | |
| | ('girl', 'Sophia') | 18859 | 7181 | 2588 | 1187 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') | |
| |:--------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:| |
| | ('CA',) | 31290 | 3765 | 45426 | 14149 | 11403 | 18859 | 1280 | 598 | 2227 | 842 | 1157 | 2588 | |
| | ('FL',) | 9395 | 2673 | 14740 | 1218 | 5089 | 7181 | 389 | 247 | 854 | 217 | 461 | 1187 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA', 'SUM(num)') | ('CA', 'MAX(num)') | ('FL', 'SUM(num)') | ('FL', 'MAX(num)') | |
| |:-------------------|---------------------:|---------------------:|---------------------:|---------------------:| |
| | ('boy', 'Edward') | 31290 | 1280 | 9395 | 389 | |
| | ('boy', 'Tony') | 3765 | 598 | 2673 | 247 | |
| | ('girl', 'Amy') | 45426 | 2227 | 14740 | 854 | |
| | ('girl', 'Cindy') | 14149 | 842 | 1218 | 217 | |
| | ('girl', 'Dawn') | 11403 | 1157 | 5089 | 461 | |
| | ('girl', 'Sophia') | 18859 | 2588 | 7181 | 1187 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') | |
| |:---------------------|---------------------:|---------------------:|---------------------------:|---------------------:|---------------------:|---------------------------:|----------------------:| |
| | ('boy', 'Edward') | 31290 | 9395 | 40685 | 1280 | 389 | 1669 | 42354 | |
| | ('boy', 'Tony') | 3765 | 2673 | 6438 | 598 | 247 | 845 | 7283 | |
| | ('boy', 'Subtotal') | 35055 | 12068 | 47123 | 1878 | 636 | 2514 | 49637 | |
| | ('girl', 'Amy') | 45426 | 14740 | 60166 | 2227 | 854 | 3081 | 63247 | |
| | ('girl', 'Cindy') | 14149 | 1218 | 15367 | 842 | 217 | 1059 | 16426 | |
| | ('girl', 'Dawn') | 11403 | 5089 | 16492 | 1157 | 461 | 1618 | 18110 | |
| | ('girl', 'Sophia') | 18859 | 7181 | 26040 | 2588 | 1187 | 3775 | 29815 | |
| | ('girl', 'Subtotal') | 89837 | 28228 | 118065 | 6814 | 2719 | 9533 | 127598 | |
| | ('Total (Sum)', '') | 124892 | 40296 | 165188 | 8692 | 3355 | 12047 | 177235 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA',) | ('FL',) | |
| |:-------------------------------|----------:|----------:| |
| | ('SUM(num)', 'boy', 'Edward') | 31290 | 9395 | |
| | ('SUM(num)', 'boy', 'Tony') | 3765 | 2673 | |
| | ('SUM(num)', 'girl', 'Amy') | 45426 | 14740 | |
| | ('SUM(num)', 'girl', 'Cindy') | 14149 | 1218 | |
| | ('SUM(num)', 'girl', 'Dawn') | 11403 | 5089 | |
| | ('SUM(num)', 'girl', 'Sophia') | 18859 | 7181 | |
| | ('MAX(num)', 'boy', 'Edward') | 1280 | 389 | |
| | ('MAX(num)', 'boy', 'Tony') | 598 | 247 | |
| | ('MAX(num)', 'girl', 'Amy') | 2227 | 854 | |
| | ('MAX(num)', 'girl', 'Cindy') | 842 | 217 | |
| | ('MAX(num)', 'girl', 'Dawn') | 1157 | 461 | |
| | ('MAX(num)', 'girl', 'Sophia') | 2588 | 1187 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA',) | ('FL',) | |
| |:-------------------------------|----------:|----------:| |
| | ('boy', 'Edward', 'SUM(num)') | 31290 | 9395 | |
| | ('boy', 'Edward', 'MAX(num)') | 1280 | 389 | |
| | ('boy', 'Tony', 'SUM(num)') | 3765 | 2673 | |
| | ('boy', 'Tony', 'MAX(num)') | 598 | 247 | |
| | ('girl', 'Amy', 'SUM(num)') | 45426 | 14740 | |
| | ('girl', 'Amy', 'MAX(num)') | 2227 | 854 | |
| | ('girl', 'Cindy', 'SUM(num)') | 14149 | 1218 | |
| | ('girl', 'Cindy', 'MAX(num)') | 842 | 217 | |
| | ('girl', 'Dawn', 'SUM(num)') | 11403 | 5089 | |
| | ('girl', 'Dawn', 'MAX(num)') | 1157 | 461 | |
| | ('girl', 'Sophia', 'SUM(num)') | 18859 | 7181 | |
| | ('girl', 'Sophia', 'MAX(num)') | 2588 | 1187 | |
| """.strip() |
| ) |
| |
| # everything |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') | |
| |:--------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:| |
| | ('CA', 'SUM(num)') | 31290 | 3765 | 35055 | 45426 | 14149 | 11403 | 18859 | 89837 | 124892 | |
| | ('CA', 'MAX(num)') | 1280 | 598 | 1878 | 2227 | 842 | 1157 | 2588 | 6814 | 8692 | |
| | ('CA', 'Subtotal') | 32570 | 4363 | 36933 | 47653 | 14991 | 12560 | 21447 | 96651 | 133584 | |
| | ('FL', 'SUM(num)') | 9395 | 2673 | 12068 | 14740 | 1218 | 5089 | 7181 | 28228 | 40296 | |
| | ('FL', 'MAX(num)') | 389 | 247 | 636 | 854 | 217 | 461 | 1187 | 2719 | 3355 | |
| | ('FL', 'Subtotal') | 9784 | 2920 | 12704 | 15594 | 1435 | 5550 | 8368 | 30947 | 43651 | |
| | ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # fraction |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum as Fraction of Columns", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'CA') | ('SUM(num)', 'FL') | ('MAX(num)', 'CA') | ('MAX(num)', 'FL') | |
| |:-------------------------------------------|---------------------:|---------------------:|---------------------:|---------------------:| |
| | ('boy', 'Edward') | 0.250536 | 0.23315 | 0.147262 | 0.115946 | |
| | ('boy', 'Tony') | 0.030146 | 0.0663341 | 0.0687989 | 0.0736215 | |
| | ('boy', 'Subtotal') | 0.280683 | 0.299484 | 0.216061 | 0.189568 | |
| | ('girl', 'Amy') | 0.363722 | 0.365793 | 0.256213 | 0.254545 | |
| | ('girl', 'Cindy') | 0.11329 | 0.0302263 | 0.0968707 | 0.0646796 | |
| | ('girl', 'Dawn') | 0.0913029 | 0.12629 | 0.133111 | 0.137407 | |
| | ('girl', 'Sophia') | 0.151002 | 0.178206 | 0.297745 | 0.3538 | |
| | ('girl', 'Subtotal') | 0.719317 | 0.700516 | 0.783939 | 0.810432 | |
| | ('Total (Sum as Fraction of Columns)', '') | 1 | 1 | 1 | 1 | |
| """.strip() # noqa: E501 |
| ) |
| |
| |
| def test_pivot_df_multi_column(): |
| """ |
| Pivot table when 2 columns, no rows and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "state": { |
| 0: "CA", |
| 1: "CA", |
| 2: "CA", |
| 3: "FL", |
| 4: "CA", |
| 5: "CA", |
| 6: "FL", |
| 7: "FL", |
| 8: "FL", |
| 9: "CA", |
| 10: "FL", |
| 11: "FL", |
| }, |
| "gender": { |
| 0: "girl", |
| 1: "boy", |
| 2: "girl", |
| 3: "girl", |
| 4: "girl", |
| 5: "girl", |
| 6: "boy", |
| 7: "girl", |
| 8: "girl", |
| 9: "boy", |
| 10: "boy", |
| 11: "girl", |
| }, |
| "SUM(num)": { |
| 0: 45426, |
| 1: 31290, |
| 2: 18859, |
| 3: 14740, |
| 4: 14149, |
| 5: 11403, |
| 6: 9395, |
| 7: 7181, |
| 8: 5089, |
| 9: 3765, |
| 10: 2673, |
| 11: 1218, |
| }, |
| "MAX(num)": { |
| 0: 2227, |
| 1: 1280, |
| 2: 2588, |
| 3: 854, |
| 4: 842, |
| 5: 1157, |
| 6: 389, |
| 7: 1187, |
| 8: 461, |
| 9: 598, |
| 10: 247, |
| 11: 217, |
| }, |
| } |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:-----------------|----------------------:|-----------------------:|----------------------:|-----------------------:| |
| | ('CA',) | 35055 | 89837 | 1878 | 6814 | |
| | ('Total (Sum)',) | 12068 | 28228 | 636 | 2719 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)',) | ('MAX(num)',) | |
| |:---------------|----------------:|----------------:| |
| | ('CA', 'boy') | 35055 | 1878 | |
| | ('CA', 'girl') | 89837 | 6814 | |
| | ('FL', 'boy') | 12068 | 636 | |
| | ('FL', 'girl') | 28228 | 2719 | |
| """.strip() |
| ) |
| |
| # combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('boy', 'SUM(num)') | ('boy', 'MAX(num)') | ('girl', 'SUM(num)') | ('girl', 'MAX(num)') | |
| |:-----------------|----------------------:|----------------------:|-----------------------:|-----------------------:| |
| | ('CA',) | 35055 | 1878 | 89837 | 6814 | |
| | ('Total (Sum)',) | 12068 | 636 | 28228 | 2719 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('SUM(num)', 'Subtotal') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') | |
| |:-----------------|----------------------:|-----------------------:|---------------------------:|----------------------:|-----------------------:|---------------------------:|----------------------:| |
| | ('CA',) | 35055 | 89837 | 124892 | 1878 | 6814 | 8692 | 133584 | |
| | ('Total (Sum)',) | 12068 | 28228 | 40296 | 636 | 2719 | 3355 | 43651 | |
| |
| """.strip() # noqa: E501 |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') | |
| |:--------------|----------------:|-----------------:|----------------:|-----------------:| |
| | ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 | |
| | ('MAX(num)',) | 1878 | 6814 | 636 | 2719 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA', 'boy') | ('CA', 'girl') | ('FL', 'boy') | ('FL', 'girl') | |
| |:--------------|----------------:|-----------------:|----------------:|-----------------:| |
| | ('SUM(num)',) | 35055 | 89837 | 12068 | 28228 | |
| | ('MAX(num)',) | 1878 | 6814 | 636 | 2719 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # everything |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('CA',) | ('Total (Sum)',) | |
| |:---------------------|----------:|-------------------:| |
| | ('boy', 'SUM(num)') | 35055 | 12068 | |
| | ('boy', 'MAX(num)') | 1878 | 636 | |
| | ('boy', 'Subtotal') | 36933 | 12704 | |
| | ('girl', 'SUM(num)') | 89837 | 28228 | |
| | ('girl', 'MAX(num)') | 6814 | 2719 | |
| | ('girl', 'Subtotal') | 96651 | 30947 | |
| | ('Total (Sum)', '') | 133584 | 43651 | |
| """.strip() |
| ) |
| |
| # fraction |
| pivoted = pivot_df( |
| df, |
| rows=None, |
| columns=["state", "gender"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum as Fraction of Columns", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy') | ('SUM(num)', 'girl') | ('MAX(num)', 'boy') | ('MAX(num)', 'girl') | |
| |:----------------------------------------|----------------------:|-----------------------:|----------------------:|-----------------------:| |
| | ('CA',) | 0.743904 | 0.760911 | 0.747017 | 0.71478 | |
| | ('Total (Sum as Fraction of Columns)',) | 0.256096 | 0.239089 | 0.252983 | 0.28522 | |
| """.strip() # noqa: E501 |
| ) |
| |
| |
| def test_pivot_df_complex_null_values(): |
| """ |
| Pivot table when a column, rows and 2 metrics are selected. |
| """ |
| df = pd.DataFrame.from_dict( |
| { |
| "state": { |
| 0: None, |
| 1: None, |
| 2: None, |
| 3: None, |
| 4: None, |
| 5: None, |
| 6: None, |
| 7: None, |
| 8: None, |
| 9: None, |
| 10: None, |
| 11: None, |
| }, |
| "gender": { |
| 0: "girl", |
| 1: "boy", |
| 2: "girl", |
| 3: "girl", |
| 4: "girl", |
| 5: "girl", |
| 6: "boy", |
| 7: "girl", |
| 8: "girl", |
| 9: "boy", |
| 10: "boy", |
| 11: "girl", |
| }, |
| "name": { |
| 0: "Amy", |
| 1: "Edward", |
| 2: "Sophia", |
| 3: "Amy", |
| 4: "Cindy", |
| 5: "Dawn", |
| 6: "Edward", |
| 7: "Sophia", |
| 8: "Dawn", |
| 9: "Tony", |
| 10: "Tony", |
| 11: "Cindy", |
| }, |
| "SUM(num)": { |
| 0: 45426, |
| 1: 31290, |
| 2: 18859, |
| 3: 14740, |
| 4: 14149, |
| 5: 11403, |
| 6: 9395, |
| 7: 7181, |
| 8: 5089, |
| 9: 3765, |
| 10: 2673, |
| 11: 1218, |
| }, |
| "MAX(num)": { |
| 0: 2227, |
| 1: 1280, |
| 2: 2588, |
| 3: 854, |
| 4: 842, |
| 5: 1157, |
| 6: 389, |
| 7: 1187, |
| 8: 461, |
| 9: 598, |
| 10: 247, |
| 11: 217, |
| }, |
| } |
| ) |
| assert ( |
| df.to_markdown() |
| == """ |
| | | state | gender | name | SUM(num) | MAX(num) | |
| |---:|:--------|:---------|:-------|-----------:|-----------:| |
| | 0 | | girl | Amy | 45426 | 2227 | |
| | 1 | | boy | Edward | 31290 | 1280 | |
| | 2 | | girl | Sophia | 18859 | 2588 | |
| | 3 | | girl | Amy | 14740 | 854 | |
| | 4 | | girl | Cindy | 14149 | 842 | |
| | 5 | | girl | Dawn | 11403 | 1157 | |
| | 6 | | boy | Edward | 9395 | 389 | |
| | 7 | | girl | Sophia | 7181 | 1187 | |
| | 8 | | girl | Dawn | 5089 | 461 | |
| | 9 | | boy | Tony | 3765 | 598 | |
| | 10 | | boy | Tony | 2673 | 247 | |
| | 11 | | girl | Cindy | 1218 | 217 | |
| """.strip() |
| ) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', nan) | ('MAX(num)', nan) | |
| |:-------------------|--------------------:|--------------------:| |
| | ('boy', 'Edward') | 40685 | 1669 | |
| | ('boy', 'Tony') | 6438 | 845 | |
| | ('girl', 'Amy') | 60166 | 3081 | |
| | ('girl', 'Cindy') | 15367 | 1059 | |
| | ('girl', 'Dawn') | 16492 | 1618 | |
| | ('girl', 'Sophia') | 26040 | 3775 | |
| |
| """.strip() |
| ) |
| |
| # transpose_pivot |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', 'boy', 'Edward') | ('SUM(num)', 'boy', 'Tony') | ('SUM(num)', 'girl', 'Amy') | ('SUM(num)', 'girl', 'Cindy') | ('SUM(num)', 'girl', 'Dawn') | ('SUM(num)', 'girl', 'Sophia') | ('MAX(num)', 'boy', 'Edward') | ('MAX(num)', 'boy', 'Tony') | ('MAX(num)', 'girl', 'Amy') | ('MAX(num)', 'girl', 'Cindy') | ('MAX(num)', 'girl', 'Dawn') | ('MAX(num)', 'girl', 'Sophia') | |
| |:-------|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:|--------------------------------:|------------------------------:|------------------------------:|--------------------------------:|-------------------------------:|---------------------------------:| |
| | (nan,) | 40685 | 6438 | 60166 | 15367 | 16492 | 26040 | 1669 | 845 | 3081 | 1059 | 1618 | 3775 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | (nan, 'SUM(num)') | (nan, 'MAX(num)') | |
| |:-------------------|--------------------:|--------------------:| |
| | ('boy', 'Edward') | 40685 | 1669 | |
| | ('boy', 'Tony') | 6438 | 845 | |
| | ('girl', 'Amy') | 60166 | 3081 | |
| | ('girl', 'Cindy') | 15367 | 1059 | |
| | ('girl', 'Dawn') | 16492 | 1618 | |
| | ('girl', 'Sophia') | 26040 | 3775 | |
| """.strip() |
| ) |
| |
| # show totals |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', nan) | ('SUM(num)', 'Subtotal') | ('MAX(num)', nan) | ('MAX(num)', 'Subtotal') | ('Total (Sum)', '') | |
| |:---------------------|--------------------:|---------------------------:|--------------------:|---------------------------:|----------------------:| |
| | ('boy', 'Edward') | 40685 | 40685 | 1669 | 1669 | 42354 | |
| | ('boy', 'Tony') | 6438 | 6438 | 845 | 845 | 7283 | |
| | ('boy', 'Subtotal') | 47123 | 47123 | 2514 | 2514 | 49637 | |
| | ('girl', 'Amy') | 60166 | 60166 | 3081 | 3081 | 63247 | |
| | ('girl', 'Cindy') | 15367 | 15367 | 1059 | 1059 | 16426 | |
| | ('girl', 'Dawn') | 16492 | 16492 | 1618 | 1618 | 18110 | |
| | ('girl', 'Sophia') | 26040 | 26040 | 3775 | 3775 | 29815 | |
| | ('girl', 'Subtotal') | 118065 | 118065 | 9533 | 9533 | 127598 | |
| | ('Total (Sum)', '') | 165188 | 165188 | 12047 | 12047 | 177235 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # apply_metrics_on_rows |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | (nan,) | |
| |:-------------------------------|---------:| |
| | ('SUM(num)', 'boy', 'Edward') | 40685 | |
| | ('SUM(num)', 'boy', 'Tony') | 6438 | |
| | ('SUM(num)', 'girl', 'Amy') | 60166 | |
| | ('SUM(num)', 'girl', 'Cindy') | 15367 | |
| | ('SUM(num)', 'girl', 'Dawn') | 16492 | |
| | ('SUM(num)', 'girl', 'Sophia') | 26040 | |
| | ('MAX(num)', 'boy', 'Edward') | 1669 | |
| | ('MAX(num)', 'boy', 'Tony') | 845 | |
| | ('MAX(num)', 'girl', 'Amy') | 3081 | |
| | ('MAX(num)', 'girl', 'Cindy') | 1059 | |
| | ('MAX(num)', 'girl', 'Dawn') | 1618 | |
| | ('MAX(num)', 'girl', 'Sophia') | 3775 | |
| """.strip() |
| ) |
| |
| # apply_metrics_on_rows with combine_metrics |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=True, |
| show_rows_total=False, |
| show_columns_total=False, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | (nan,) | |
| |:-------------------------------|---------:| |
| | ('boy', 'Edward', 'SUM(num)') | 40685 | |
| | ('boy', 'Edward', 'MAX(num)') | 1669 | |
| | ('boy', 'Tony', 'SUM(num)') | 6438 | |
| | ('boy', 'Tony', 'MAX(num)') | 845 | |
| | ('girl', 'Amy', 'SUM(num)') | 60166 | |
| | ('girl', 'Amy', 'MAX(num)') | 3081 | |
| | ('girl', 'Cindy', 'SUM(num)') | 15367 | |
| | ('girl', 'Cindy', 'MAX(num)') | 1059 | |
| | ('girl', 'Dawn', 'SUM(num)') | 16492 | |
| | ('girl', 'Dawn', 'MAX(num)') | 1618 | |
| | ('girl', 'Sophia', 'SUM(num)') | 26040 | |
| | ('girl', 'Sophia', 'MAX(num)') | 3775 | |
| """.strip() |
| ) |
| |
| # everything |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum", |
| transpose_pivot=True, |
| combine_metrics=True, |
| show_rows_total=True, |
| show_columns_total=True, |
| apply_metrics_on_rows=True, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('boy', 'Edward') | ('boy', 'Tony') | ('boy', 'Subtotal') | ('girl', 'Amy') | ('girl', 'Cindy') | ('girl', 'Dawn') | ('girl', 'Sophia') | ('girl', 'Subtotal') | ('Total (Sum)', '') | |
| |:--------------------|--------------------:|------------------:|----------------------:|------------------:|--------------------:|-------------------:|---------------------:|-----------------------:|----------------------:| |
| | (nan, 'SUM(num)') | 40685 | 6438 | 47123 | 60166 | 15367 | 16492 | 26040 | 118065 | 165188 | |
| | (nan, 'MAX(num)') | 1669 | 845 | 2514 | 3081 | 1059 | 1618 | 3775 | 9533 | 12047 | |
| | (nan, 'Subtotal') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 | |
| | ('Total (Sum)', '') | 42354 | 7283 | 49637 | 63247 | 16426 | 18110 | 29815 | 127598 | 177235 | |
| """.strip() # noqa: E501 |
| ) |
| |
| # fraction |
| pivoted = pivot_df( |
| df, |
| rows=["gender", "name"], |
| columns=["state"], |
| metrics=["SUM(num)", "MAX(num)"], |
| aggfunc="Sum as Fraction of Columns", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| assert ( |
| pivoted.to_markdown() |
| == """ |
| | | ('SUM(num)', nan) | ('MAX(num)', nan) | |
| |:-------------------------------------------|--------------------:|--------------------:| |
| | ('boy', 'Edward') | 0.246295 | 0.138541 | |
| | ('boy', 'Tony') | 0.0389738 | 0.0701419 | |
| | ('boy', 'Subtotal') | 0.285269 | 0.208683 | |
| | ('girl', 'Amy') | 0.364227 | 0.255748 | |
| | ('girl', 'Cindy') | 0.0930273 | 0.0879057 | |
| | ('girl', 'Dawn') | 0.0998378 | 0.134307 | |
| | ('girl', 'Sophia') | 0.157639 | 0.313356 | |
| | ('girl', 'Subtotal') | 0.714731 | 0.791317 | |
| | ('Total (Sum as Fraction of Columns)', '') | 1 | 1 | |
| """.strip() # noqa: E501 |
| ) |
| |
| |
| def test_table(): |
| """ |
| Test that the table reports honor `d3NumberFormat`. |
| """ |
| df = pd.DataFrame.from_dict({"count": {0: 80679663}}) |
| form_data = { |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "NULL", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_ameaka2efjv_rfv1et5nwng", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "!=", |
| "sqlExpression": None, |
| "subject": "lang_at_home", |
| } |
| ], |
| "all_columns": [], |
| "color_pn": True, |
| "column_config": {"count": {"d3NumberFormat": ",d"}}, |
| "conditional_formatting": [], |
| "datasource": "8__table", |
| "extra_form_data": {}, |
| "granularity_sqla": "time_start", |
| "groupby": ["lang_at_home"], |
| "metrics": ["count"], |
| "order_by_cols": [], |
| "order_desc": True, |
| "percent_metrics": [], |
| "query_mode": "aggregate", |
| "row_limit": "15", |
| "server_page_length": 10, |
| "show_cell_bars": True, |
| "table_timestamp_format": "smart_date", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "url_params": {}, |
| "viz_type": "table", |
| } |
| formatted = table(df, form_data) |
| assert ( |
| formatted.to_markdown() |
| == """ |
| | | count | |
| |---:|:-----------| |
| | 0 | 80,679,663 | |
| """.strip() |
| ) |
| |
| |
| def test_apply_client_processing_no_form_invalid_viz_type(): |
| """ |
| Test with invalid viz type. It should just return the result |
| """ |
| result = {"foo": "bar"} |
| form_data = {"viz_type": "baz"} |
| assert apply_client_processing(result, form_data) == result |
| |
| |
| def test_apply_client_processing_without_result_format(): |
| """ |
| A query without result_format should raise an exception |
| """ |
| result = {"queries": [{"result_format": "foo"}]} |
| form_data = {"viz_type": "pivot_table_v2"} |
| |
| with pytest.raises(Exception) as ex: # noqa: PT011 |
| apply_client_processing(result, form_data) |
| |
| assert ex.match("Result format foo not supported") is True # noqa: E712 |
| |
| |
| def test_apply_client_processing_json_format(): |
| """ |
| It should be able to process json results |
| """ |
| |
| result = { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.JSON, |
| "data": { |
| "result": [ |
| { |
| "data": [{"COUNT(is_software_dev)": 4725}], |
| "colnames": ["COUNT(is_software_dev)"], |
| "coltypes": [0], |
| } |
| ] |
| }, |
| } |
| ] |
| } |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.JSON, |
| "data": { |
| "result": { |
| "Total (Sum)": { |
| "data": [{"COUNT(is_software_dev)": 4725}], |
| "colnames": ["COUNT(is_software_dev)"], |
| "coltypes": [0], |
| } |
| } |
| }, |
| "colnames": [("result",)], |
| "indexnames": [("Total (Sum)",)], |
| "coltypes": [GenericDataType.STRING], |
| "rowcount": 1, |
| } |
| ] |
| } |
| |
| |
| def test_apply_client_processing_csv_format(): |
| """ |
| It should be able to process csv results |
| """ |
| |
| result = { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": """ |
| COUNT(is_software_dev) |
| 4725 |
| """, |
| } |
| ] |
| } |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n", |
| "colnames": [("COUNT(is_software_dev)",)], |
| "indexnames": [("Total (Sum)",)], |
| "coltypes": [GenericDataType.NUMERIC], |
| "rowcount": 1, |
| } |
| ] |
| } |
| |
| |
| def test_apply_client_processing_csv_format_simple_table(): |
| """ |
| It should be able to process csv results |
| And not show a default column |
| """ |
| |
| result = { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": """ |
| COUNT(is_software_dev) |
| 4725 |
| """, |
| } |
| ] |
| } |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "table", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": "COUNT(is_software_dev)\n4725\n", |
| "colnames": ["COUNT(is_software_dev)"], |
| "indexnames": [0], |
| "coltypes": [GenericDataType.NUMERIC], |
| "rowcount": 1, |
| } |
| ] |
| } |
| |
| |
| def test_apply_client_processing_csv_format_empty_string(): |
| """ |
| It should be able to process csv results with no data |
| """ |
| |
| result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}]} |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [{"result_format": ChartDataResultFormat.CSV, "data": ""}] |
| } |
| |
| |
| @pytest.mark.parametrize("data", [None, "", "\n"]) |
| def test_apply_client_processing_csv_format_no_data(data): |
| """ |
| It should be able to process csv results with no data |
| """ |
| |
| result = {"queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}]} |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [{"result_format": ChartDataResultFormat.CSV, "data": data}] |
| } |
| |
| |
| def test_apply_client_processing_csv_format_no_data_multiple_queries(): |
| """ |
| It should be able to process csv results multiple queries if one query has no data |
| """ |
| |
| result = { |
| "queries": [ |
| {"result_format": ChartDataResultFormat.CSV, "data": ""}, |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": """ |
| COUNT(is_software_dev) |
| 4725 |
| """, |
| }, |
| ] |
| } |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [ |
| {"result_format": ChartDataResultFormat.CSV, "data": ""}, |
| { |
| "result_format": ChartDataResultFormat.CSV, |
| "data": ",COUNT(is_software_dev)\nTotal (Sum),4725\n", |
| "colnames": [("COUNT(is_software_dev)",)], |
| "indexnames": [("Total (Sum)",)], |
| "coltypes": [GenericDataType.NUMERIC], |
| "rowcount": 1, |
| }, |
| ] |
| } |
| |
| |
| def test_apply_client_processing_json_format_empty_string(): |
| """ |
| It should be able to process json results with no data |
| """ |
| |
| result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}]} |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [{"result_format": ChartDataResultFormat.JSON, "data": ""}] |
| } |
| |
| |
| def test_apply_client_processing_json_format_data_is_none(): |
| """ |
| It should be able to process json results with no data |
| """ |
| |
| result = {"queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}]} |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": [ |
| { |
| "aggregate": "COUNT", |
| "column": { |
| "column_name": "is_software_dev", |
| "description": None, |
| "expression": None, |
| "filterable": True, |
| "groupby": True, |
| "id": 1463, |
| "is_dttm": False, |
| "python_date_format": None, |
| "type": "DOUBLE PRECISION", |
| "verbose_name": None, |
| }, |
| "expressionType": "SIMPLE", |
| "hasCustomLabel": False, |
| "isNew": False, |
| "label": "COUNT(is_software_dev)", |
| "optionName": "metric_9i1kctig9yr_sizo6ihd2o", |
| "sqlExpression": None, |
| } |
| ], |
| "metricsLayout": "COLUMNS", |
| "adhoc_filters": [ |
| { |
| "clause": "WHERE", |
| "comparator": "Currently A Developer", |
| "expressionType": "SIMPLE", |
| "filterOptionName": "filter_fvi0jg9aii_2lekqrhy7qk", |
| "isExtra": False, |
| "isNew": False, |
| "operator": "==", |
| "sqlExpression": None, |
| "subject": "developer_type", |
| } |
| ], |
| "row_limit": 10000, |
| "order_desc": True, |
| "aggregateFunction": "Sum", |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data) == { |
| "queries": [{"result_format": ChartDataResultFormat.JSON, "data": None}] |
| } |
| |
| |
| def test_apply_client_processing_verbose_map(session: Session): |
| from superset import db |
| from superset.connectors.sqla.models import SqlaTable, SqlMetric |
| from superset.models.core import Database |
| |
| engine = db.session.get_bind() |
| SqlaTable.metadata.create_all(engine) # pylint: disable=no-member |
| database = Database(database_name="my_database", sqlalchemy_uri="sqlite://") |
| sqla_table = SqlaTable( |
| table_name="my_sqla_table", |
| columns=[], |
| metrics=[ |
| SqlMetric( |
| metric_name="count", |
| verbose_name="COUNT(*)", |
| metric_type="count", |
| expression="COUNT(*)", |
| ) |
| ], |
| database=database, |
| ) |
| |
| result = { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.JSON, |
| "data": [{"count": 4725}], |
| } |
| ] |
| } |
| form_data = { |
| "datasource": "19__table", |
| "viz_type": "pivot_table_v2", |
| "slice_id": 69, |
| "url_params": {}, |
| "granularity_sqla": "time_start", |
| "time_grain_sqla": "P1D", |
| "time_range": "No filter", |
| "groupbyColumns": [], |
| "groupbyRows": [], |
| "metrics": ["COUNT(*)"], |
| "metricsLayout": "COLUMNS", |
| "row_limit": 10000, |
| "order_desc": True, |
| "valueFormat": "SMART_NUMBER", |
| "date_format": "smart_date", |
| "rowOrder": "key_a_to_z", |
| "colOrder": "key_a_to_z", |
| "extra_form_data": {}, |
| "force": False, |
| "result_format": "json", |
| "result_type": "results", |
| } |
| |
| assert apply_client_processing(result, form_data, datasource=sqla_table) == { |
| "queries": [ |
| { |
| "result_format": ChartDataResultFormat.JSON, |
| "data": {"COUNT(*)": {"Total (Sum)": 4725}}, |
| "colnames": [("COUNT(*)",)], |
| "indexnames": [("Total (Sum)",)], |
| "coltypes": [GenericDataType.NUMERIC], |
| "rowcount": 1, |
| } |
| ] |
| } |
| |
| |
| def test_pivot_multi_level_index(): |
| """ |
| Pivot table with multi-level indexing. |
| """ |
| arrays = [ |
| ["Region1", "Region1", "Region1", "Region2", "Region2", "Region2"], |
| ["State1", "State1", "State2", "State3", "State3", "State4"], |
| ["City1", "City2", "City3", "City4", "City5", "City6"], |
| ] |
| index = pd.MultiIndex.from_tuples( |
| list(zip(*arrays, strict=False)), |
| names=["Region", "State", "City"], |
| ) |
| |
| data = { |
| "Metric1": [10, 20, 30, 40, 50, 60], |
| "Metric2": [5, 10, 15, 20, 25, 30], |
| "Metric3": [None, None, None, None, None, None], |
| } |
| df = pd.DataFrame(data, index=index) |
| |
| pivoted = pivot_df( |
| df, |
| rows=["Region", "State", "City"], |
| columns=[], |
| metrics=["Metric1", "Metric2", "Metric3"], |
| aggfunc="Sum", |
| transpose_pivot=False, |
| combine_metrics=False, |
| show_rows_total=False, |
| show_columns_total=True, |
| apply_metrics_on_rows=False, |
| ) |
| |
| # Sort the pivoted DataFrame to ensure deterministic output |
| pivoted_sorted = pivoted.sort_index() |
| |
| assert ( |
| pivoted_sorted.to_markdown() |
| == """ |
| | | ('Metric1',) | ('Metric2',) | ('Metric3',) | |
| |:----------------------------------|---------------:|---------------:|---------------:| |
| | ('Region1', 'State1', 'City1') | 10 | 5 | nan | |
| | ('Region1', 'State1', 'City2') | 20 | 10 | nan | |
| | ('Region1', 'State1', 'Subtotal') | 30 | 15 | 0 | |
| | ('Region1', 'State2', 'City3') | 30 | 15 | nan | |
| | ('Region1', 'State2', 'Subtotal') | 30 | 15 | 0 | |
| | ('Region1', 'Subtotal', '') | 60 | 30 | 0 | |
| | ('Region2', 'State3', 'City4') | 40 | 20 | nan | |
| | ('Region2', 'State3', 'City5') | 50 | 25 | nan | |
| | ('Region2', 'State3', 'Subtotal') | 90 | 45 | 0 | |
| | ('Region2', 'State4', 'City6') | 60 | 30 | nan | |
| | ('Region2', 'State4', 'Subtotal') | 60 | 30 | 0 | |
| | ('Region2', 'Subtotal', '') | 150 | 75 | 0 | |
| | ('Total (Sum)', '', '') | 210 | 105 | 0 | |
| """.strip() |
| ) |