blob: 70c553afde3212dde9a591f95509444e2ebc037f [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
import textwrap
from typing import Dict, Union
import pandas as pd
from sqlalchemy import DateTime, String
from sqlalchemy.sql import column
from superset import db, security_manager
from superset.connectors.sqla.models import SqlMetric, TableColumn
from superset.models.core import Database
from superset.models.dashboard import Dashboard
from superset.models.slice import Slice
from superset.utils.core import get_example_database
from .helpers import (
config,
get_example_data,
get_slice_json,
merge_slice,
misc_dash_slices,
TBL,
update_slice_ids,
)
def gen_filter(
subject: str, comparator: str, operator: str = "=="
) -> Dict[str, Union[bool, str]]:
return {
"clause": "WHERE",
"comparator": comparator,
"expressionType": "SIMPLE",
"operator": operator,
"subject": subject,
}
def load_data(tbl_name: str, database: Database) -> None:
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
"gender": String(16),
"state": String(10),
"name": String(255),
},
index=False,
)
print("Done loading table!")
print("-" * 80)
def load_birth_names(only_metadata: bool = False, force: bool = False) -> None:
"""Loading birth name dataset from a zip file in the repo"""
# pylint: disable=too-many-locals
tbl_name = "birth_names"
database = get_example_database()
table_exists = database.has_table_by_name(tbl_name)
if not only_metadata and (not table_exists or force):
load_data(tbl_name, database)
obj = db.session.query(TBL).filter_by(table_name=tbl_name).first()
if not obj:
print(f"Creating table [{tbl_name}] reference")
obj = TBL(table_name=tbl_name)
db.session.add(obj)
obj.main_dttm_col = "ds"
obj.database = database
obj.filter_select_enabled = True
if not any(col.column_name == "num_california" for col in obj.columns):
col_state = str(column("state").compile(db.engine))
col_num = str(column("num").compile(db.engine))
obj.columns.append(
TableColumn(
column_name="num_california",
expression=f"CASE WHEN {col_state} = 'CA' THEN {col_num} ELSE 0 END",
)
)
if not any(col.metric_name == "sum__num" for col in obj.metrics):
col = str(column("num").compile(db.engine))
obj.metrics.append(SqlMetric(metric_name="sum__num", expression=f"SUM({col})"))
db.session.commit()
obj.fetch_metadata()
tbl = obj
metrics = [
{
"expressionType": "SIMPLE",
"column": {"column_name": "num", "type": "BIGINT"},
"aggregate": "SUM",
"label": "Births",
"optionName": "metric_11",
}
]
metric = "sum__num"
defaults = {
"compare_lag": "10",
"compare_suffix": "o10Y",
"limit": "25",
"granularity_sqla": "ds",
"groupby": [],
"row_limit": config["ROW_LIMIT"],
"since": "100 years ago",
"until": "now",
"viz_type": "table",
"markup_type": "markdown",
}
admin = security_manager.find_user("admin")
print("Creating some slices")
slices = [
Slice(
slice_name="Participants",
viz_type="big_number",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="big_number",
granularity_sqla="ds",
compare_lag="5",
compare_suffix="over 5Y",
metric=metric,
),
),
Slice(
slice_name="Genders",
viz_type="pie",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults, viz_type="pie", groupby=["gender"], metric=metric
),
),
Slice(
slice_name="Trends",
viz_type="line",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="line",
groupby=["name"],
granularity_sqla="ds",
rich_tooltip=True,
show_legend=True,
metrics=metrics,
),
),
Slice(
slice_name="Genders by State",
viz_type="dist_bar",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
adhoc_filters=[
{
"clause": "WHERE",
"expressionType": "SIMPLE",
"filterOptionName": "2745eae5",
"comparator": ["other"],
"operator": "NOT IN",
"subject": "state",
}
],
viz_type="dist_bar",
metrics=[
{
"expressionType": "SIMPLE",
"column": {"column_name": "sum_boys", "type": "BIGINT(20)"},
"aggregate": "SUM",
"label": "Boys",
"optionName": "metric_11",
},
{
"expressionType": "SIMPLE",
"column": {"column_name": "sum_girls", "type": "BIGINT(20)"},
"aggregate": "SUM",
"label": "Girls",
"optionName": "metric_12",
},
],
groupby=["state"],
),
),
Slice(
slice_name="Girls",
viz_type="table",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
groupby=["name"],
adhoc_filters=[gen_filter("gender", "girl")],
row_limit=50,
timeseries_limit_metric="sum__num",
metrics=metrics,
),
),
Slice(
slice_name="Girl Name Cloud",
viz_type="word_cloud",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="word_cloud",
size_from="10",
series="name",
size_to="70",
rotation="square",
limit="100",
adhoc_filters=[gen_filter("gender", "girl")],
metric=metric,
),
),
Slice(
slice_name="Boys",
viz_type="table",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
groupby=["name"],
adhoc_filters=[gen_filter("gender", "boy")],
row_limit=50,
metrics=metrics,
),
),
Slice(
slice_name="Boy Name Cloud",
viz_type="word_cloud",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="word_cloud",
size_from="10",
series="name",
size_to="70",
rotation="square",
limit="100",
adhoc_filters=[gen_filter("gender", "boy")],
metric=metric,
),
),
Slice(
slice_name="Top 10 Girl Name Share",
viz_type="area",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
adhoc_filters=[gen_filter("gender", "girl")],
comparison_type="values",
groupby=["name"],
limit=10,
stacked_style="expand",
time_grain_sqla="P1D",
viz_type="area",
x_axis_forma="smart_date",
metrics=metrics,
),
),
Slice(
slice_name="Top 10 Boy Name Share",
viz_type="area",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
adhoc_filters=[gen_filter("gender", "boy")],
comparison_type="values",
groupby=["name"],
limit=10,
stacked_style="expand",
time_grain_sqla="P1D",
viz_type="area",
x_axis_forma="smart_date",
metrics=metrics,
),
),
]
misc_slices = [
Slice(
slice_name="Average and Sum Trends",
viz_type="dual_line",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="dual_line",
metric={
"expressionType": "SIMPLE",
"column": {"column_name": "num", "type": "BIGINT(20)"},
"aggregate": "AVG",
"label": "AVG(num)",
"optionName": "metric_vgops097wej_g8uff99zhk7",
},
metric_2="sum__num",
granularity_sqla="ds",
metrics=metrics,
),
),
Slice(
slice_name="Num Births Trend",
viz_type="line",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(defaults, viz_type="line", metrics=metrics),
),
Slice(
slice_name="Daily Totals",
viz_type="table",
datasource_type="table",
datasource_id=tbl.id,
created_by=admin,
params=get_slice_json(
defaults,
groupby=["ds"],
since="40 years ago",
until="now",
viz_type="table",
metrics=metrics,
),
),
Slice(
slice_name="Number of California Births",
viz_type="big_number_total",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
metric={
"expressionType": "SIMPLE",
"column": {
"column_name": "num_california",
"expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
},
"aggregate": "SUM",
"label": "SUM(num_california)",
},
viz_type="big_number_total",
granularity_sqla="ds",
),
),
Slice(
slice_name="Top 10 California Names Timeseries",
viz_type="line",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
metrics=[
{
"expressionType": "SIMPLE",
"column": {
"column_name": "num_california",
"expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
},
"aggregate": "SUM",
"label": "SUM(num_california)",
}
],
viz_type="line",
granularity_sqla="ds",
groupby=["name"],
timeseries_limit_metric={
"expressionType": "SIMPLE",
"column": {
"column_name": "num_california",
"expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
},
"aggregate": "SUM",
"label": "SUM(num_california)",
},
limit="10",
),
),
Slice(
slice_name="Names Sorted by Num in California",
viz_type="table",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
metrics=metrics,
groupby=["name"],
row_limit=50,
timeseries_limit_metric={
"expressionType": "SIMPLE",
"column": {
"column_name": "num_california",
"expression": "CASE WHEN state = 'CA' THEN num ELSE 0 END",
},
"aggregate": "SUM",
"label": "SUM(num_california)",
},
),
),
Slice(
slice_name="Number of Girls",
viz_type="big_number_total",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
metric=metric,
viz_type="big_number_total",
granularity_sqla="ds",
adhoc_filters=[gen_filter("gender", "girl")],
subheader="total female participants",
),
),
Slice(
slice_name="Pivot Table",
viz_type="pivot_table",
datasource_type="table",
datasource_id=tbl.id,
params=get_slice_json(
defaults,
viz_type="pivot_table",
groupby=["name"],
columns=["state"],
metrics=metrics,
),
),
]
for slc in slices:
merge_slice(slc)
for slc in misc_slices:
merge_slice(slc)
misc_dash_slices.add(slc.slice_name)
print("Creating a dashboard")
dash = db.session.query(Dashboard).filter_by(slug="births").first()
if not dash:
dash = Dashboard()
db.session.add(dash)
dash.published = True
dash.json_metadata = textwrap.dedent(
"""\
{
"label_colors": {
"Girls": "#FF69B4",
"Boys": "#ADD8E6",
"girl": "#FF69B4",
"boy": "#ADD8E6"
}
}"""
)
js = textwrap.dedent(
# pylint: disable=line-too-long
"""\
{
"CHART-6GdlekVise": {
"children": [],
"id": "CHART-6GdlekVise",
"meta": {
"chartId": 5547,
"height": 50,
"sliceName": "Top 10 Girl Name Share",
"width": 5
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-eh0w37bWbR"
],
"type": "CHART"
},
"CHART-6n9jxb30JG": {
"children": [],
"id": "CHART-6n9jxb30JG",
"meta": {
"chartId": 5540,
"height": 36,
"sliceName": "Genders by State",
"width": 5
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW--EyBZQlDi"
],
"type": "CHART"
},
"CHART-Jj9qh1ol-N": {
"children": [],
"id": "CHART-Jj9qh1ol-N",
"meta": {
"chartId": 5545,
"height": 50,
"sliceName": "Boy Name Cloud",
"width": 4
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-kzWtcvo8R1"
],
"type": "CHART"
},
"CHART-ODvantb_bF": {
"children": [],
"id": "CHART-ODvantb_bF",
"meta": {
"chartId": 5548,
"height": 50,
"sliceName": "Top 10 Boy Name Share",
"width": 5
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-kzWtcvo8R1"
],
"type": "CHART"
},
"CHART-PAXUUqwmX9": {
"children": [],
"id": "CHART-PAXUUqwmX9",
"meta": {
"chartId": 5538,
"height": 34,
"sliceName": "Genders",
"width": 3
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-2n0XgiHDgs"
],
"type": "CHART"
},
"CHART-_T6n_K9iQN": {
"children": [],
"id": "CHART-_T6n_K9iQN",
"meta": {
"chartId": 5539,
"height": 36,
"sliceName": "Trends",
"width": 7
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW--EyBZQlDi"
],
"type": "CHART"
},
"CHART-eNY0tcE_ic": {
"children": [],
"id": "CHART-eNY0tcE_ic",
"meta": {
"chartId": 5537,
"height": 34,
"sliceName": "Participants",
"width": 3
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-2n0XgiHDgs"
],
"type": "CHART"
},
"CHART-g075mMgyYb": {
"children": [],
"id": "CHART-g075mMgyYb",
"meta": {
"chartId": 5541,
"height": 50,
"sliceName": "Girls",
"width": 3
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-eh0w37bWbR"
],
"type": "CHART"
},
"CHART-n-zGGE6S1y": {
"children": [],
"id": "CHART-n-zGGE6S1y",
"meta": {
"chartId": 5542,
"height": 50,
"sliceName": "Girl Name Cloud",
"width": 4
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-eh0w37bWbR"
],
"type": "CHART"
},
"CHART-vJIPjmcbD3": {
"children": [],
"id": "CHART-vJIPjmcbD3",
"meta": {
"chartId": 5543,
"height": 50,
"sliceName": "Boys",
"width": 3
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-kzWtcvo8R1"
],
"type": "CHART"
},
"DASHBOARD_VERSION_KEY": "v2",
"GRID_ID": {
"children": [
"ROW-2n0XgiHDgs",
"ROW--EyBZQlDi",
"ROW-eh0w37bWbR",
"ROW-kzWtcvo8R1"
],
"id": "GRID_ID",
"parents": [
"ROOT_ID"
],
"type": "GRID"
},
"HEADER_ID": {
"id": "HEADER_ID",
"meta": {
"text": "Births"
},
"type": "HEADER"
},
"MARKDOWN-zaflB60tbC": {
"children": [],
"id": "MARKDOWN-zaflB60tbC",
"meta": {
"code": "<div style=\\"text-align:center\\"> <h1>Birth Names Dashboard</h1> <img src=\\"/static/assets/images/babies.png\\" style=\\"width:50%;\\"></div>",
"height": 34,
"width": 6
},
"parents": [
"ROOT_ID",
"GRID_ID",
"ROW-2n0XgiHDgs"
],
"type": "MARKDOWN"
},
"ROOT_ID": {
"children": [
"GRID_ID"
],
"id": "ROOT_ID",
"type": "ROOT"
},
"ROW--EyBZQlDi": {
"children": [
"CHART-_T6n_K9iQN",
"CHART-6n9jxb30JG"
],
"id": "ROW--EyBZQlDi",
"meta": {
"background": "BACKGROUND_TRANSPARENT"
},
"parents": [
"ROOT_ID",
"GRID_ID"
],
"type": "ROW"
},
"ROW-2n0XgiHDgs": {
"children": [
"CHART-eNY0tcE_ic",
"MARKDOWN-zaflB60tbC",
"CHART-PAXUUqwmX9"
],
"id": "ROW-2n0XgiHDgs",
"meta": {
"background": "BACKGROUND_TRANSPARENT"
},
"parents": [
"ROOT_ID",
"GRID_ID"
],
"type": "ROW"
},
"ROW-eh0w37bWbR": {
"children": [
"CHART-g075mMgyYb",
"CHART-n-zGGE6S1y",
"CHART-6GdlekVise"
],
"id": "ROW-eh0w37bWbR",
"meta": {
"background": "BACKGROUND_TRANSPARENT"
},
"parents": [
"ROOT_ID",
"GRID_ID"
],
"type": "ROW"
},
"ROW-kzWtcvo8R1": {
"children": [
"CHART-vJIPjmcbD3",
"CHART-Jj9qh1ol-N",
"CHART-ODvantb_bF"
],
"id": "ROW-kzWtcvo8R1",
"meta": {
"background": "BACKGROUND_TRANSPARENT"
},
"parents": [
"ROOT_ID",
"GRID_ID"
],
"type": "ROW"
}
}
""" # pylint: enable=line-too-long
)
pos = json.loads(js)
# dashboard v2 doesn't allow add markup slice
dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
update_slice_ids(pos, dash.slices)
dash.dashboard_title = "USA Births Names"
dash.position_json = json.dumps(pos, indent=4)
dash.slug = "births"
db.session.commit()