| """This module contains the "Viz" objects |
| |
| These objects represent the backend of all the visualizations that |
| Superset can render. |
| """ |
| from __future__ import absolute_import |
| from __future__ import division |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| |
| import copy |
| import hashlib |
| import logging |
| import uuid |
| import zlib |
| |
| from collections import OrderedDict, defaultdict |
| from datetime import datetime, timedelta |
| |
| import pandas as pd |
| import numpy as np |
| from flask import request |
| from flask_babel import lazy_gettext as _ |
| from markdown import markdown |
| import simplejson as json |
| from six import string_types, PY3 |
| from werkzeug.datastructures import ImmutableMultiDict, MultiDict |
| from werkzeug.urls import Href |
| from dateutil import relativedelta as rdelta |
| |
| from superset import app, utils, cache |
| from superset.forms import FormFactory |
| from superset.utils import flasher, DTTM_ALIAS |
| |
| config = app.config |
| |
| |
| class BaseViz(object): |
| |
| """All visualizations derive this base class""" |
| |
| viz_type = None |
| verbose_name = "Base Viz" |
| credits = "" |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metrics', 'groupby', |
| ) |
| },) |
| form_overrides = {} |
| |
| def __init__(self, datasource, form_data, slice_=None): |
| self.orig_form_data = form_data |
| if not datasource: |
| raise Exception("Viz is missing a datasource") |
| self.datasource = datasource |
| self.request = request |
| self.viz_type = form_data.get("viz_type") |
| self.slice = slice_ |
| |
| # TODO refactor all form related logic out of here and into forms.py |
| ff = FormFactory(self) |
| form_class = ff.get_form() |
| defaults = form_class().data.copy() |
| previous_viz_type = form_data.get('previous_viz_type') |
| if isinstance(form_data, (MultiDict, ImmutableMultiDict)): |
| form = form_class(form_data) |
| else: |
| form = form_class(**form_data) |
| data = form.data.copy() |
| |
| if not form.validate(): |
| for k, v in form.errors.items(): |
| if not data.get('json') and not data.get('async'): |
| flasher("{}: {}".format(k, " ".join(v)), 'danger') |
| if previous_viz_type != self.viz_type: |
| data = { |
| k: form.data[k] |
| for k in form_data.keys() |
| if k in form.data} |
| defaults.update(data) |
| self.form_data = defaults |
| self.query = "" |
| self.form_data['previous_viz_type'] = self.viz_type |
| self.token = self.form_data.get( |
| 'token', 'token_' + uuid.uuid4().hex[:8]) |
| self.metrics = self.form_data.get('metrics') or [] |
| self.groupby = self.form_data.get('groupby') or [] |
| self.reassignments() |
| |
| @classmethod |
| def flat_form_fields(cls): |
| l = set() |
| for d in cls.fieldsets: |
| for obj in d['fields']: |
| if obj and isinstance(obj, (tuple, list)): |
| l |= {a for a in obj if a} |
| elif obj: |
| l.add(obj) |
| return tuple(l) |
| |
| def reassignments(self): |
| pass |
| |
| def get_url(self, for_cache_key=False, json_endpoint=False, **kwargs): |
| """Returns the URL for the viz |
| |
| :param for_cache_key: when getting the url as the identifier to hash |
| for the cache key |
| :type for_cache_key: boolean |
| """ |
| d = self.orig_form_data.copy() |
| if 'json' in d: |
| del d['json'] |
| if 'action' in d: |
| del d['action'] |
| if 'slice_id' in d: |
| del d['slice_id'] |
| d.update(kwargs) |
| # Remove unchecked checkboxes because HTML is weird like that |
| od = MultiDict() |
| for key in sorted(d.keys()): |
| # if MultiDict is initialized with MD({key:[emptyarray]}), |
| # key is included in d.keys() but accessing it throws |
| try: |
| if d[key] is False: |
| del d[key] |
| continue |
| except IndexError: |
| pass |
| |
| if isinstance(d, (MultiDict, ImmutableMultiDict)): |
| v = d.getlist(key) |
| else: |
| v = d.get(key) |
| if not isinstance(v, list): |
| v = [v] |
| for item in v: |
| od.add(key, item) |
| |
| base_endpoint = '/superset/explore' |
| if json_endpoint: |
| base_endpoint = '/superset/explore_json' |
| |
| href = Href( |
| '{base_endpoint}/{self.datasource.type}/' |
| '{self.datasource.id}/'.format(**locals())) |
| if for_cache_key and 'force' in od: |
| del od['force'] |
| return href(od) |
| |
| def get_df(self, query_obj=None): |
| """Returns a pandas dataframe based on the query object""" |
| if not query_obj: |
| query_obj = self.query_obj() |
| |
| self.error_msg = "" |
| self.results = None |
| |
| timestamp_format = None |
| if self.datasource.type == 'table': |
| dttm_col = self.datasource.get_col(query_obj['granularity']) |
| if dttm_col: |
| timestamp_format = dttm_col.python_date_format |
| |
| # The datasource here can be different backend but the interface is common |
| self.results = self.datasource.query(**query_obj) |
| self.query = self.results.query |
| df = self.results.df |
| # Transform the timestamp we received from database to pandas supported |
| # datetime format. If no python_date_format is specified, the pattern will |
| # be considered as the default ISO date format |
| # If the datetime format is unix, the parse will use the corresponding |
| # parsing logic. |
| if df is None or df.empty: |
| raise utils.NoDataException("No data.") |
| else: |
| if DTTM_ALIAS in df.columns: |
| if timestamp_format in ("epoch_s", "epoch_ms"): |
| df[DTTM_ALIAS] = pd.to_datetime(df[DTTM_ALIAS], utc=False) |
| else: |
| df[DTTM_ALIAS] = pd.to_datetime( |
| df[DTTM_ALIAS], utc=False, format=timestamp_format) |
| if self.datasource.offset: |
| df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) |
| df.replace([np.inf, -np.inf], np.nan) |
| df = df.fillna(0) |
| return df |
| |
| @property |
| def form(self): |
| return self.form_class(**self.form_data) |
| |
| @property |
| def form_class(self): |
| return FormFactory(self).get_form() |
| |
| def get_extra_filters(self): |
| extra_filters = self.form_data.get('extra_filters') |
| if not extra_filters: |
| return {} |
| return json.loads(extra_filters) |
| |
| def query_filters(self, is_having_filter=False): |
| """Processes the filters for the query""" |
| form_data = self.form_data |
| # Building filters |
| filters = [] |
| field_prefix = 'flt' if not is_having_filter else 'having' |
| for i in range(1, 10): |
| col = form_data.get(field_prefix + "_col_" + str(i)) |
| op = form_data.get(field_prefix + "_op_" + str(i)) |
| eq = form_data.get(field_prefix + "_eq_" + str(i)) |
| if col and op and eq is not None: |
| filters.append((col, op, eq)) |
| |
| if is_having_filter: |
| return filters |
| |
| # Extra filters (coming from dashboard) |
| for col, vals in self.get_extra_filters().items(): |
| if not (col and vals): |
| continue |
| elif col in self.datasource.filterable_column_names: |
| # Quote values with comma to avoid conflict |
| vals = ["'{}'".format(x) if "," in x else x for x in vals] |
| filters += [(col, 'in', ",".join(vals))] |
| return filters |
| |
| def query_obj(self): |
| """Building a query object""" |
| form_data = self.form_data |
| groupby = form_data.get("groupby") or [] |
| metrics = form_data.get("metrics") or ['count'] |
| extra_filters = self.get_extra_filters() |
| granularity = ( |
| form_data.get("granularity") or form_data.get("granularity_sqla") |
| ) |
| limit = int(form_data.get("limit", 0)) |
| timeseries_limit_metric = form_data.get("timeseries_limit_metric") |
| row_limit = int( |
| form_data.get("row_limit", config.get("ROW_LIMIT"))) |
| since = ( |
| extra_filters.get('__from') or form_data.get("since", "1 year ago") |
| ) |
| from_dttm = utils.parse_human_datetime(since) |
| now = datetime.now() |
| if from_dttm > now: |
| from_dttm = now - (from_dttm - now) |
| until = extra_filters.get('__to') or form_data.get("until", "now") |
| to_dttm = utils.parse_human_datetime(until) |
| if from_dttm > to_dttm: |
| flasher("The date range doesn't seem right.", "danger") |
| from_dttm = to_dttm # Making them identical to not raise |
| |
| # extras are used to query elements specific to a datasource type |
| # for instance the extra where clause that applies only to Tables |
| extras = { |
| 'where': form_data.get("where", ''), |
| 'having': form_data.get("having", ''), |
| 'having_druid': self.query_filters(is_having_filter=True), |
| 'time_grain_sqla': form_data.get("time_grain_sqla", ''), |
| 'druid_time_origin': form_data.get("druid_time_origin", ''), |
| } |
| d = { |
| 'granularity': granularity, |
| 'from_dttm': from_dttm, |
| 'to_dttm': to_dttm, |
| 'is_timeseries': self.is_timeseries, |
| 'groupby': groupby, |
| 'metrics': metrics, |
| 'row_limit': row_limit, |
| 'filter': self.query_filters(), |
| 'timeseries_limit': limit, |
| 'extras': extras, |
| 'timeseries_limit_metric': timeseries_limit_metric, |
| } |
| return d |
| |
| @property |
| def cache_timeout(self): |
| |
| if self.slice and self.slice.cache_timeout: |
| return self.slice.cache_timeout |
| if self.datasource.cache_timeout: |
| return self.datasource.cache_timeout |
| if ( |
| hasattr(self.datasource, 'database') and |
| self.datasource.database.cache_timeout): |
| return self.datasource.database.cache_timeout |
| return config.get("CACHE_DEFAULT_TIMEOUT") |
| |
| def get_json(self, force=False): |
| """Handles caching around the json payload retrieval""" |
| cache_key = self.cache_key |
| payload = None |
| force = force if force else self.form_data.get('force') == 'true' |
| if not force: |
| payload = cache.get(cache_key) |
| |
| if payload: |
| is_cached = True |
| try: |
| cached_data = zlib.decompress(payload) |
| if PY3: |
| cached_data = cached_data.decode('utf-8') |
| payload = json.loads(cached_data) |
| except Exception as e: |
| logging.error("Error reading cache: " + |
| utils.error_msg_from_exception(e)) |
| payload = None |
| logging.info("Serving from cache") |
| |
| if not payload: |
| is_cached = False |
| cache_timeout = self.cache_timeout |
| |
| payload = { |
| 'cache_timeout': cache_timeout, |
| 'cache_key': cache_key, |
| 'csv_endpoint': self.csv_endpoint, |
| 'data': self.get_data(), |
| 'form_data': self.form_data, |
| 'json_endpoint': self.json_endpoint, |
| 'query': self.query, |
| 'standalone_endpoint': self.standalone_endpoint, |
| 'column_formats': self.data['column_formats'], |
| } |
| payload['cached_dttm'] = datetime.now().isoformat().split('.')[0] |
| logging.info("Caching for the next {} seconds".format( |
| cache_timeout)) |
| try: |
| data = self.json_dumps(payload) |
| if PY3: |
| data = bytes(data, 'utf-8') |
| cache.set( |
| cache_key, |
| zlib.compress(data), |
| timeout=cache_timeout) |
| except Exception as e: |
| # cache.set call can fail if the backend is down or if |
| # the key is too large or whatever other reasons |
| logging.warning("Could not cache key {}".format(cache_key)) |
| logging.exception(e) |
| cache.delete(cache_key) |
| payload['is_cached'] = is_cached |
| return self.json_dumps(payload) |
| |
| def json_dumps(self, obj): |
| """Used by get_json, can be overridden to use specific switches""" |
| return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True) |
| |
| @property |
| def data(self): |
| """This is the data object serialized to the js layer""" |
| content = { |
| 'csv_endpoint': self.csv_endpoint, |
| 'form_data': self.form_data, |
| 'json_endpoint': self.json_endpoint, |
| 'standalone_endpoint': self.standalone_endpoint, |
| 'token': self.token, |
| 'viz_name': self.viz_type, |
| 'column_formats': { |
| m.metric_name: m.d3format |
| for m in self.datasource.metrics |
| if m.d3format |
| }, |
| } |
| return content |
| |
| def get_csv(self): |
| df = self.get_df() |
| include_index = not isinstance(df.index, pd.RangeIndex) |
| return df.to_csv(index=include_index, encoding="utf-8") |
| |
| def get_data(self): |
| return [] |
| |
| @property |
| def json_endpoint(self): |
| return self.get_url(json_endpoint=True) |
| |
| @property |
| def cache_key(self): |
| url = self.get_url(for_cache_key=True, json="true", force="false") |
| return hashlib.md5(url.encode('utf-8')).hexdigest() |
| |
| @property |
| def csv_endpoint(self): |
| return self.get_url(csv="true") |
| |
| @property |
| def standalone_endpoint(self): |
| return self.get_url(standalone="true") |
| |
| @property |
| def json_data(self): |
| return json.dumps(self.data) |
| |
| |
| class TableViz(BaseViz): |
| |
| """A basic html table that is sortable and searchable""" |
| |
| viz_type = "table" |
| verbose_name = _("Table View") |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| fieldsets = ({ |
| 'label': _("GROUP BY"), |
| 'description': _('Use this section if you want a query that aggregates'), |
| 'fields': ('groupby', 'metrics') |
| }, { |
| 'label': _("NOT GROUPED BY"), |
| 'description': _('Use this section if you want to query atomic rows'), |
| 'fields': ('all_columns', 'order_by_cols'), |
| }, { |
| 'label': _("Options"), |
| 'fields': ( |
| 'table_timestamp_format', |
| 'row_limit', |
| 'page_length', |
| ('include_search', None), |
| ) |
| }) |
| form_overrides = ({ |
| 'metrics': { |
| 'default': [], |
| }, |
| }) |
| is_timeseries = False |
| |
| def query_obj(self): |
| d = super(TableViz, self).query_obj() |
| fd = self.form_data |
| if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')): |
| raise Exception( |
| "Choose either fields to [Group By] and [Metrics] or " |
| "[Columns], not both") |
| if fd.get('all_columns'): |
| d['columns'] = fd.get('all_columns') |
| d['groupby'] = [] |
| order_by_cols = fd.get('order_by_cols') or [] |
| d['orderby'] = [json.loads(t) for t in order_by_cols] |
| return d |
| |
| def get_df(self, query_obj=None): |
| df = super(TableViz, self).get_df(query_obj) |
| if ( |
| self.form_data.get("granularity") == "all" and |
| DTTM_ALIAS in df): |
| del df[DTTM_ALIAS] |
| return df |
| |
| def get_data(self): |
| df = self.get_df() |
| return dict( |
| records=df.to_dict(orient="records"), |
| columns=list(df.columns), |
| ) |
| |
| def json_dumps(self, obj): |
| return json.dumps(obj, default=utils.json_iso_dttm_ser) |
| |
| |
| class PivotTableViz(BaseViz): |
| |
| """A pivot table view, define your rows, columns and metrics""" |
| |
| viz_type = "pivot_table" |
| verbose_name = _("Pivot Table") |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'groupby', |
| 'columns', |
| 'metrics', |
| 'pandas_aggfunc', |
| ) |
| },) |
| |
| def query_obj(self): |
| d = super(PivotTableViz, self).query_obj() |
| groupby = self.form_data.get('groupby') |
| columns = self.form_data.get('columns') |
| metrics = self.form_data.get('metrics') |
| if not columns: |
| columns = [] |
| if not groupby: |
| groupby = [] |
| if not groupby: |
| raise Exception("Please choose at least one \"Group by\" field ") |
| if not metrics: |
| raise Exception("Please choose at least one metric") |
| if ( |
| any(v in groupby for v in columns) or |
| any(v in columns for v in groupby)): |
| raise Exception("groupby and columns can't overlap") |
| |
| d['groupby'] = list(set(groupby) | set(columns)) |
| return d |
| |
| def get_df(self, query_obj=None): |
| df = super(PivotTableViz, self).get_df(query_obj) |
| if ( |
| self.form_data.get("granularity") == "all" and |
| DTTM_ALIAS in df): |
| del df[DTTM_ALIAS] |
| df = df.pivot_table( |
| index=self.form_data.get('groupby'), |
| columns=self.form_data.get('columns'), |
| values=self.form_data.get('metrics'), |
| aggfunc=self.form_data.get('pandas_aggfunc'), |
| margins=True, |
| ) |
| return df |
| |
| def get_data(self): |
| return self.get_df().to_html( |
| na_rep='', |
| classes=( |
| "dataframe table table-striped table-bordered " |
| "table-condensed table-hover").split(" ")) |
| |
| |
| class MarkupViz(BaseViz): |
| |
| """Use html or markdown to create a free form widget""" |
| |
| viz_type = "markup" |
| verbose_name = _("Markup") |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ('markup_type', 'code') |
| },) |
| is_timeseries = False |
| |
| def rendered(self): |
| markup_type = self.form_data.get("markup_type") |
| code = self.form_data.get("code", '') |
| if markup_type == "markdown": |
| return markdown(code) |
| elif markup_type == "html": |
| return code |
| |
| def get_data(self): |
| return dict(html=self.rendered()) |
| |
| |
| class SeparatorViz(MarkupViz): |
| |
| """Use to create section headers in a dashboard, similar to `Markup`""" |
| |
| viz_type = "separator" |
| verbose_name = _("Separator") |
| form_overrides = { |
| 'code': { |
| 'default': ( |
| "####Section Title\n" |
| "A paragraph describing the section" |
| "of the dashboard, right before the separator line " |
| "\n\n" |
| "---------------" |
| ), |
| } |
| } |
| |
| |
| class WordCloudViz(BaseViz): |
| |
| """Build a colorful word cloud |
| |
| Uses the nice library at: |
| https://github.com/jasondavies/d3-cloud |
| """ |
| |
| viz_type = "word_cloud" |
| verbose_name = _("Word Cloud") |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'series', 'metric', 'limit', |
| ('size_from', 'size_to'), |
| 'rotation', |
| ) |
| },) |
| |
| def query_obj(self): |
| d = super(WordCloudViz, self).query_obj() |
| |
| d['metrics'] = [self.form_data.get('metric')] |
| d['groupby'] = [self.form_data.get('series')] |
| return d |
| |
| def get_data(self): |
| df = self.get_df() |
| # Ordering the columns |
| df = df[[self.form_data.get('series'), self.form_data.get('metric')]] |
| # Labeling the columns for uniform json schema |
| df.columns = ['text', 'size'] |
| return df.to_dict(orient="records") |
| |
| |
| class TreemapViz(BaseViz): |
| |
| """Tree map visualisation for hierarchical data.""" |
| |
| viz_type = "treemap" |
| verbose_name = _("Treemap") |
| credits = '<a href="https://d3js.org">d3.js</a>' |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metrics', |
| 'groupby', |
| ), |
| }, { |
| 'label': _('Chart Options'), |
| 'fields': ( |
| 'treemap_ratio', |
| 'number_format', |
| ) |
| },) |
| |
| def get_df(self, query_obj=None): |
| df = super(TreemapViz, self).get_df(query_obj) |
| df = df.set_index(self.form_data.get("groupby")) |
| return df |
| |
| def _nest(self, metric, df): |
| nlevels = df.index.nlevels |
| if nlevels == 1: |
| result = [{"name": n, "value": v} |
| for n, v in zip(df.index, df[metric])] |
| else: |
| result = [{"name": l, "children": self._nest(metric, df.loc[l])} |
| for l in df.index.levels[0]] |
| return result |
| |
| def get_data(self): |
| df = self.get_df() |
| chart_data = [{"name": metric, "children": self._nest(metric, df)} |
| for metric in df.columns] |
| return chart_data |
| |
| |
| class CalHeatmapViz(BaseViz): |
| |
| """Calendar heatmap.""" |
| |
| viz_type = "cal_heatmap" |
| verbose_name = _("Calendar Heatmap") |
| credits = ( |
| '<a href=https://github.com/wa0x6e/cal-heatmap>cal-heatmap</a>') |
| is_timeseries = True |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metric', |
| 'domain_granularity', |
| 'subdomain_granularity', |
| ), |
| },) |
| |
| def get_df(self, query_obj=None): |
| df = super(CalHeatmapViz, self).get_df(query_obj) |
| return df |
| |
| def get_data(self): |
| df = self.get_df() |
| form_data = self.form_data |
| |
| df.columns = ["timestamp", "metric"] |
| timestamps = {str(obj["timestamp"].value / 10**9): |
| obj.get("metric") for obj in df.to_dict("records")} |
| |
| start = utils.parse_human_datetime(form_data.get("since")) |
| end = utils.parse_human_datetime(form_data.get("until")) |
| domain = form_data.get("domain_granularity") |
| diff_delta = rdelta.relativedelta(end, start) |
| diff_secs = (end - start).total_seconds() |
| |
| if domain == "year": |
| range_ = diff_delta.years + 1 |
| elif domain == "month": |
| range_ = diff_delta.years * 12 + diff_delta.months + 1 |
| elif domain == "week": |
| range_ = diff_delta.years * 53 + diff_delta.weeks + 1 |
| elif domain == "day": |
| range_ = diff_secs // (24*60*60) + 1 |
| else: |
| range_ = diff_secs // (60*60) + 1 |
| |
| return { |
| "timestamps": timestamps, |
| "start": start, |
| "domain": domain, |
| "subdomain": form_data.get("subdomain_granularity"), |
| "range": range_, |
| } |
| |
| def query_obj(self): |
| qry = super(CalHeatmapViz, self).query_obj() |
| qry["metrics"] = [self.form_data["metric"]] |
| return qry |
| |
| |
| class NVD3Viz(BaseViz): |
| |
| """Base class for all nvd3 vizs""" |
| |
| credits = '<a href="http://nvd3.org/">NVD3.org</a>' |
| viz_type = None |
| verbose_name = "Base NVD3 Viz" |
| is_timeseries = False |
| |
| |
| class BoxPlotViz(NVD3Viz): |
| |
| """Box plot viz from ND3""" |
| |
| viz_type = "box_plot" |
| verbose_name = _("Box Plot") |
| sort_series = False |
| is_timeseries = True |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metrics', |
| 'groupby', 'limit', |
| ), |
| }, { |
| 'label': _('Chart Options'), |
| 'fields': ( |
| 'whisker_options', |
| ) |
| },) |
| |
| def get_df(self, query_obj=None): |
| form_data = self.form_data |
| df = super(BoxPlotViz, self).get_df(query_obj) |
| |
| df = df.fillna(0) |
| |
| # conform to NVD3 names |
| def Q1(series): # need to be named functions - can't use lambdas |
| return np.percentile(series, 25) |
| |
| def Q3(series): |
| return np.percentile(series, 75) |
| |
| whisker_type = form_data.get('whisker_options') |
| if whisker_type == "Tukey": |
| |
| def whisker_high(series): |
| upper_outer_lim = Q3(series) + 1.5 * (Q3(series) - Q1(series)) |
| series = series[series <= upper_outer_lim] |
| return series[np.abs(series - upper_outer_lim).argmin()] |
| |
| def whisker_low(series): |
| lower_outer_lim = Q1(series) - 1.5 * (Q3(series) - Q1(series)) |
| # find the closest value above the lower outer limit |
| series = series[series >= lower_outer_lim] |
| return series[np.abs(series - lower_outer_lim).argmin()] |
| |
| elif whisker_type == "Min/max (no outliers)": |
| |
| def whisker_high(series): |
| return series.max() |
| |
| def whisker_low(series): |
| return series.min() |
| |
| elif " percentiles" in whisker_type: |
| low, high = whisker_type.replace(" percentiles", "").split("/") |
| |
| def whisker_high(series): |
| return np.percentile(series, int(high)) |
| |
| def whisker_low(series): |
| return np.percentile(series, int(low)) |
| |
| else: |
| raise ValueError("Unknown whisker type: {}".format(whisker_type)) |
| |
| def outliers(series): |
| above = series[series > whisker_high(series)] |
| below = series[series < whisker_low(series)] |
| # pandas sometimes doesn't like getting lists back here |
| return set(above.tolist() + below.tolist()) |
| |
| aggregate = [Q1, np.median, Q3, whisker_high, whisker_low, outliers] |
| df = df.groupby(form_data.get('groupby')).agg(aggregate) |
| return df |
| |
| def to_series(self, df, classed='', title_suffix=''): |
| label_sep = " - " |
| chart_data = [] |
| for index_value, row in zip(df.index, df.to_dict(orient="records")): |
| if isinstance(index_value, tuple): |
| index_value = label_sep.join(index_value) |
| boxes = defaultdict(dict) |
| for (label, key), value in row.items(): |
| if key == "median": |
| key = "Q2" |
| boxes[label][key] = value |
| for label, box in boxes.items(): |
| if len(self.form_data.get("metrics")) > 1: |
| # need to render data labels with metrics |
| chart_label = label_sep.join([index_value, label]) |
| else: |
| chart_label = index_value |
| chart_data.append({ |
| "label": chart_label, |
| "values": box, |
| }) |
| return chart_data |
| |
| def get_data(self): |
| df = self.get_df() |
| chart_data = self.to_series(df) |
| return chart_data |
| |
| |
| class BubbleViz(NVD3Viz): |
| |
| """Based on the NVD3 bubble chart""" |
| |
| viz_type = "bubble" |
| verbose_name = _("Bubble Chart") |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'series', 'entity', |
| 'x', 'y', |
| 'size', 'limit', |
| ) |
| }, { |
| 'label': _('Chart Options'), |
| 'fields': ( |
| ('x_log_scale', 'y_log_scale'), |
| ('show_legend', None), |
| 'max_bubble_size', |
| ('x_axis_label', 'y_axis_label'), |
| ) |
| },) |
| |
| def query_obj(self): |
| form_data = self.form_data |
| d = super(BubbleViz, self).query_obj() |
| d['groupby'] = list({ |
| form_data.get('series'), |
| form_data.get('entity') |
| }) |
| self.x_metric = form_data.get('x') |
| self.y_metric = form_data.get('y') |
| self.z_metric = form_data.get('size') |
| self.entity = form_data.get('entity') |
| self.series = form_data.get('series') |
| |
| d['metrics'] = [ |
| self.z_metric, |
| self.x_metric, |
| self.y_metric, |
| ] |
| if not all(d['metrics'] + [self.entity, self.series]): |
| raise Exception("Pick a metric for x, y and size") |
| return d |
| |
| def get_df(self, query_obj=None): |
| df = super(BubbleViz, self).get_df(query_obj) |
| df = df.fillna(0) |
| df['x'] = df[[self.x_metric]] |
| df['y'] = df[[self.y_metric]] |
| df['size'] = df[[self.z_metric]] |
| df['shape'] = 'circle' |
| df['group'] = df[[self.series]] |
| return df |
| |
| def get_data(self): |
| df = self.get_df() |
| series = defaultdict(list) |
| for row in df.to_dict(orient='records'): |
| series[row['group']].append(row) |
| chart_data = [] |
| for k, v in series.items(): |
| chart_data.append({ |
| 'key': k, |
| 'values': v}) |
| return chart_data |
| |
| |
| class BigNumberViz(BaseViz): |
| |
| """Put emphasis on a single metric with this big number viz""" |
| |
| viz_type = "big_number" |
| verbose_name = _("Big Number with Trendline") |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| is_timeseries = True |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metric', |
| 'compare_lag', |
| 'compare_suffix', |
| 'y_axis_format', |
| ) |
| },) |
| form_overrides = { |
| 'y_axis_format': { |
| 'label': _('Number format'), |
| } |
| } |
| |
| def reassignments(self): |
| metric = self.form_data.get('metric') |
| if not metric: |
| self.form_data['metric'] = self.orig_form_data.get('metrics') |
| |
| def query_obj(self): |
| d = super(BigNumberViz, self).query_obj() |
| metric = self.form_data.get('metric') |
| if not metric: |
| raise Exception("Pick a metric!") |
| d['metrics'] = [self.form_data.get('metric')] |
| self.form_data['metric'] = metric |
| return d |
| |
| def get_data(self): |
| form_data = self.form_data |
| df = self.get_df() |
| df.sort_values(by=df.columns[0], inplace=True) |
| compare_lag = form_data.get("compare_lag", "") |
| compare_lag = int(compare_lag) if compare_lag and compare_lag.isdigit() else 0 |
| return { |
| 'data': df.values.tolist(), |
| 'compare_lag': compare_lag, |
| 'compare_suffix': form_data.get('compare_suffix', ''), |
| } |
| |
| |
| class BigNumberTotalViz(BaseViz): |
| |
| """Put emphasis on a single metric with this big number viz""" |
| |
| viz_type = "big_number_total" |
| verbose_name = _("Big Number") |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metric', |
| 'subheader', |
| 'y_axis_format', |
| ) |
| },) |
| form_overrides = { |
| 'y_axis_format': { |
| 'label': _('Number format'), |
| } |
| } |
| |
| def reassignments(self): |
| metric = self.form_data.get('metric') |
| if not metric: |
| self.form_data['metric'] = self.orig_form_data.get('metrics') |
| |
| def query_obj(self): |
| d = super(BigNumberTotalViz, self).query_obj() |
| metric = self.form_data.get('metric') |
| if not metric: |
| raise Exception("Pick a metric!") |
| d['metrics'] = [self.form_data.get('metric')] |
| self.form_data['metric'] = metric |
| return d |
| |
| def get_data(self): |
| form_data = self.form_data |
| df = self.get_df() |
| df.sort_values(by=df.columns[0], inplace=True) |
| return { |
| 'data': df.values.tolist(), |
| 'subheader': form_data.get('subheader', ''), |
| } |
| |
| |
| class NVD3TimeSeriesViz(NVD3Viz): |
| |
| """A rich line chart component with tons of options""" |
| |
| viz_type = "line" |
| verbose_name = _("Time Series - Line Chart") |
| sort_series = False |
| is_timeseries = True |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metrics', |
| 'groupby', |
| ('limit', 'timeseries_limit_metric'), |
| ), |
| }, { |
| 'label': _('Chart Options'), |
| 'fields': ( |
| ('show_brush', 'show_legend'), |
| ('rich_tooltip', 'y_axis_zero'), |
| ('y_log_scale', 'contribution'), |
| ('show_markers', 'x_axis_showminmax'), |
| ('line_interpolation', None), |
| ('x_axis_format', 'y_axis_format'), |
| ('x_axis_label', 'y_axis_label'), |
| ), |
| }, { |
| 'label': _('Advanced Analytics'), |
| 'description': _( |
| "This section contains options " |
| "that allow for advanced analytical post processing " |
| "of query results"), |
| 'fields': ( |
| ('rolling_type', 'rolling_periods'), |
| 'time_compare', |
| ('num_period_compare', 'period_ratio_type'), |
| None, |
| ('resample_how', 'resample_rule',), 'resample_fillmethod' |
| ), |
| },) |
| |
| def get_df(self, query_obj=None): |
| form_data = self.form_data |
| df = super(NVD3TimeSeriesViz, self).get_df(query_obj) |
| df = df.fillna(0) |
| if form_data.get("granularity") == "all": |
| raise Exception("Pick a time granularity for your time series") |
| |
| df = df.pivot_table( |
| index=DTTM_ALIAS, |
| columns=form_data.get('groupby'), |
| values=form_data.get('metrics')) |
| |
| fm = form_data.get("resample_fillmethod") |
| if not fm: |
| fm = None |
| how = form_data.get("resample_how") |
| rule = form_data.get("resample_rule") |
| if how and rule: |
| df = df.resample(rule, how=how, fill_method=fm) |
| if not fm: |
| df = df.fillna(0) |
| |
| if self.sort_series: |
| dfs = df.sum() |
| dfs.sort_values(ascending=False, inplace=True) |
| df = df[dfs.index] |
| |
| if form_data.get("contribution"): |
| dft = df.T |
| df = (dft / dft.sum()).T |
| |
| num_period_compare = form_data.get("num_period_compare") |
| if num_period_compare: |
| num_period_compare = int(num_period_compare) |
| prt = form_data.get('period_ratio_type') |
| if prt and prt == 'growth': |
| df = (df / df.shift(num_period_compare)) - 1 |
| elif prt and prt == 'value': |
| df = df - df.shift(num_period_compare) |
| else: |
| df = df / df.shift(num_period_compare) |
| |
| df = df[num_period_compare:] |
| |
| rolling_periods = form_data.get("rolling_periods") |
| rolling_type = form_data.get("rolling_type") |
| |
| if rolling_type in ('mean', 'std', 'sum') and rolling_periods: |
| if rolling_type == 'mean': |
| df = pd.rolling_mean(df, int(rolling_periods), min_periods=0) |
| elif rolling_type == 'std': |
| df = pd.rolling_std(df, int(rolling_periods), min_periods=0) |
| elif rolling_type == 'sum': |
| df = pd.rolling_sum(df, int(rolling_periods), min_periods=0) |
| elif rolling_type == 'cumsum': |
| df = df.cumsum() |
| return df |
| |
| def to_series(self, df, classed='', title_suffix=''): |
| cols = [] |
| for col in df.columns: |
| if col == '': |
| cols.append('N/A') |
| elif col is None: |
| cols.append('NULL') |
| else: |
| cols.append(col) |
| df.columns = cols |
| series = df.to_dict('series') |
| |
| chart_data = [] |
| for name in df.T.index.tolist(): |
| ys = series[name] |
| if df[name].dtype.kind not in "biufc": |
| continue |
| df[DTTM_ALIAS] = pd.to_datetime(df.index, utc=False) |
| if isinstance(name, string_types): |
| series_title = name |
| else: |
| name = ["{}".format(s) for s in name] |
| if len(self.form_data.get('metrics')) > 1: |
| series_title = ", ".join(name) |
| else: |
| series_title = ", ".join(name[1:]) |
| if title_suffix: |
| series_title += title_suffix |
| |
| d = { |
| "key": series_title, |
| "classed": classed, |
| "values": [ |
| {'x': ds, 'y': ys[ds] if ds in ys else None} |
| for ds in df[DTTM_ALIAS] |
| ], |
| } |
| chart_data.append(d) |
| return chart_data |
| |
| def get_data(self): |
| df = self.get_df() |
| chart_data = self.to_series(df) |
| |
| time_compare = self.form_data.get('time_compare') |
| if time_compare: |
| query_object = self.query_obj() |
| delta = utils.parse_human_timedelta(time_compare) |
| query_object['inner_from_dttm'] = query_object['from_dttm'] |
| query_object['inner_to_dttm'] = query_object['to_dttm'] |
| query_object['from_dttm'] -= delta |
| query_object['to_dttm'] -= delta |
| |
| df2 = self.get_df(query_object) |
| df2.index += delta |
| chart_data += self.to_series( |
| df2, classed='superset', title_suffix="---") |
| chart_data = sorted(chart_data, key=lambda x: x['key']) |
| return chart_data |
| |
| |
| class NVD3TimeSeriesBarViz(NVD3TimeSeriesViz): |
| |
| """A bar chart where the x axis is time""" |
| |
| viz_type = "bar" |
| sort_series = True |
| verbose_name = _("Time Series - Bar Chart") |
| fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{ |
| 'label': _('Chart Options'), |
| 'fields': ( |
| ('show_brush', 'show_legend', 'show_bar_value'), |
| ('rich_tooltip', 'y_axis_zero'), |
| ('y_log_scale', 'contribution'), |
| ('x_axis_format', 'y_axis_format'), |
| ('line_interpolation', 'bar_stacked'), |
| ('x_axis_showminmax', 'bottom_margin'), |
| ('x_axis_label', 'y_axis_label'), |
| ('reduce_x_ticks', 'show_controls'), |
| ), }] + [NVD3TimeSeriesViz.fieldsets[2]] |
| |
| |
| class NVD3CompareTimeSeriesViz(NVD3TimeSeriesViz): |
| |
| """A line chart component where you can compare the % change over time""" |
| |
| viz_type = 'compare' |
| verbose_name = _("Time Series - Percent Change") |
| |
| |
| class NVD3TimeSeriesStackedViz(NVD3TimeSeriesViz): |
| |
| """A rich stack area chart""" |
| |
| viz_type = "area" |
| verbose_name = _("Time Series - Stacked") |
| sort_series = True |
| fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{ |
| 'label': _('Chart Options'), |
| 'fields': ( |
| ('show_brush', 'show_legend'), |
| ('rich_tooltip', 'y_axis_zero'), |
| ('y_log_scale', 'contribution'), |
| ('x_axis_format', 'y_axis_format'), |
| ('x_axis_showminmax', 'show_controls'), |
| ('line_interpolation', 'stacked_style'), |
| ), }] + [NVD3TimeSeriesViz.fieldsets[2]] |
| |
| |
| class DistributionPieViz(NVD3Viz): |
| |
| """Annoy visualization snobs with this controversial pie chart""" |
| |
| viz_type = "pie" |
| verbose_name = _("Distribution - NVD3 - Pie Chart") |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'metrics', 'groupby', |
| 'limit', |
| 'pie_label_type', |
| ('donut', 'show_legend'), |
| 'labels_outside', |
| ) |
| },) |
| |
| def query_obj(self): |
| d = super(DistributionPieViz, self).query_obj() |
| d['is_timeseries'] = False |
| return d |
| |
| def get_df(self, query_obj=None): |
| df = super(DistributionPieViz, self).get_df(query_obj) |
| df = df.pivot_table( |
| index=self.groupby, |
| values=[self.metrics[0]]) |
| df.sort_values(by=self.metrics[0], ascending=False, inplace=True) |
| return df |
| |
| def get_data(self): |
| df = self.get_df() |
| df = df.reset_index() |
| df.columns = ['x', 'y'] |
| return df.to_dict(orient="records") |
| |
| |
| class HistogramViz(BaseViz): |
| |
| """Histogram""" |
| |
| viz_type = "histogram" |
| verbose_name = _("Histogram") |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| ('all_columns_x',), |
| 'row_limit', |
| ) |
| }, { |
| 'label': _("Histogram Options"), |
| 'fields': ( |
| 'link_length', |
| ) |
| },) |
| |
| form_overrides = { |
| 'all_columns_x': { |
| 'label': _('Numeric Column'), |
| 'description': _("Select the numeric column to draw the histogram"), |
| }, |
| 'link_length': { |
| 'label': _("No of Bins"), |
| 'description': _("Select number of bins for the histogram"), |
| 'default': 5 |
| } |
| } |
| |
| def query_obj(self): |
| """Returns the query object for this visualization""" |
| d = super(HistogramViz, self).query_obj() |
| d['row_limit'] = self.form_data.get('row_limit', int(config.get('ROW_LIMIT'))) |
| numeric_column = self.form_data.get('all_columns_x') |
| if numeric_column is None: |
| raise Exception("Must have one numeric column specified") |
| d['columns'] = [numeric_column] |
| return d |
| |
| def get_df(self, query_obj=None): |
| """Returns a pandas dataframe based on the query object""" |
| if not query_obj: |
| query_obj = self.query_obj() |
| |
| self.results = self.datasource.query(**query_obj) |
| self.query = self.results.query |
| df = self.results.df |
| |
| if df is None or df.empty: |
| raise Exception("No data, to build histogram") |
| |
| df.replace([np.inf, -np.inf], np.nan) |
| df = df.fillna(0) |
| return df |
| |
| def get_data(self): |
| """Returns the chart data""" |
| df = self.get_df() |
| chart_data = df[df.columns[0]].values.tolist() |
| return chart_data |
| |
| |
| class DistributionBarViz(DistributionPieViz): |
| |
| """A good old bar chart""" |
| |
| viz_type = "dist_bar" |
| verbose_name = _("Distribution - Bar Chart") |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': _('Chart Options'), |
| 'fields': ( |
| 'groupby', |
| 'columns', |
| 'metrics', |
| 'row_limit', |
| ('show_legend', 'show_bar_value', 'bar_stacked'), |
| ('y_axis_format', 'bottom_margin'), |
| ('x_axis_label', 'y_axis_label'), |
| ('reduce_x_ticks', 'contribution'), |
| ('show_controls', 'order_bars'), |
| ) |
| },) |
| form_overrides = { |
| 'groupby': { |
| 'label': _('Series'), |
| }, |
| 'columns': { |
| 'label': _('Breakdowns'), |
| 'description': _("Defines how each series is broken down"), |
| }, |
| } |
| |
| def query_obj(self): |
| d = super(DistributionPieViz, self).query_obj() # noqa |
| fd = self.form_data |
| d['is_timeseries'] = False |
| gb = fd.get('groupby') or [] |
| cols = fd.get('columns') or [] |
| d['groupby'] = set(gb + cols) |
| if len(d['groupby']) < len(gb) + len(cols): |
| raise Exception("Can't have overlap between Series and Breakdowns") |
| if not self.metrics: |
| raise Exception("Pick at least one metric") |
| if not self.groupby: |
| raise Exception("Pick at least one field for [Series]") |
| return d |
| |
| def get_df(self, query_obj=None): |
| df = super(DistributionPieViz, self).get_df(query_obj) # noqa |
| fd = self.form_data |
| |
| row = df.groupby(self.groupby).sum()[self.metrics[0]].copy() |
| row.sort_values(ascending=False, inplace=True) |
| columns = fd.get('columns') or [] |
| pt = df.pivot_table( |
| index=self.groupby, |
| columns=columns, |
| values=self.metrics) |
| if fd.get("contribution"): |
| pt = pt.fillna(0) |
| pt = pt.T |
| pt = (pt / pt.sum()).T |
| pt = pt.reindex(row.index) |
| return pt |
| |
| def get_data(self): |
| df = self.get_df() |
| chart_data = [] |
| for name, ys in df.iteritems(): |
| if df[name].dtype.kind not in "biufc": |
| continue |
| if isinstance(name, string_types): |
| series_title = name |
| elif len(self.metrics) > 1: |
| series_title = ", ".join(name) |
| else: |
| l = [str(s) for s in name[1:]] |
| series_title = ", ".join(l) |
| d = { |
| "key": series_title, |
| "values": [ |
| {'x': str(i), 'y': v} |
| for i, v in ys.iteritems()] |
| } |
| chart_data.append(d) |
| return chart_data |
| |
| |
| class SunburstViz(BaseViz): |
| |
| """A multi level sunburst chart""" |
| |
| viz_type = "sunburst" |
| verbose_name = _("Sunburst") |
| is_timeseries = False |
| credits = ( |
| 'Kerry Rodden ' |
| '@<a href="https://bl.ocks.org/kerryrodden/7090426">bl.ocks.org</a>') |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'groupby', |
| 'metric', 'secondary_metric', |
| 'row_limit', |
| ) |
| },) |
| form_overrides = { |
| 'metric': { |
| 'label': _('Primary Metric'), |
| 'description': _( |
| "The primary metric is used to " |
| "define the arc segment sizes"), |
| }, |
| 'secondary_metric': { |
| 'label': _('Secondary Metric'), |
| 'description': _( |
| "This secondary metric is used to " |
| "define the color as a ratio against the primary metric. " |
| "If the two metrics match, color is mapped level groups"), |
| }, |
| 'groupby': { |
| 'label': _('Hierarchy'), |
| 'description': _("This defines the level of the hierarchy"), |
| }, |
| } |
| |
| def get_df(self, query_obj=None): |
| df = super(SunburstViz, self).get_df(query_obj) |
| return df |
| |
| def get_data(self): |
| df = self.get_df() |
| |
| # if m1 == m2 duplicate the metric column |
| cols = self.form_data.get('groupby') |
| metric = self.form_data.get('metric') |
| secondary_metric = self.form_data.get('secondary_metric') |
| if metric == secondary_metric: |
| ndf = df |
| ndf.columns = [cols + ['m1', 'm2']] |
| else: |
| cols += [ |
| self.form_data['metric'], self.form_data['secondary_metric']] |
| ndf = df[cols] |
| return json.loads(ndf.to_json(orient="values")) # TODO fix this nonsense |
| |
| def query_obj(self): |
| qry = super(SunburstViz, self).query_obj() |
| qry['metrics'] = [ |
| self.form_data['metric'], self.form_data['secondary_metric']] |
| return qry |
| |
| |
| class SankeyViz(BaseViz): |
| |
| """A Sankey diagram that requires a parent-child dataset""" |
| |
| viz_type = "sankey" |
| verbose_name = _("Sankey") |
| is_timeseries = False |
| credits = '<a href="https://www.npmjs.com/package/d3-sankey">d3-sankey on npm</a>' |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'groupby', |
| 'metric', |
| 'row_limit', |
| ) |
| },) |
| form_overrides = { |
| 'groupby': { |
| 'label': _('Source / Target'), |
| 'description': _("Choose a source and a target"), |
| }, |
| } |
| |
| def query_obj(self): |
| qry = super(SankeyViz, self).query_obj() |
| if len(qry['groupby']) != 2: |
| raise Exception("Pick exactly 2 columns as [Source / Target]") |
| qry['metrics'] = [ |
| self.form_data['metric']] |
| return qry |
| |
| def get_data(self): |
| df = self.get_df() |
| df.columns = ['source', 'target', 'value'] |
| recs = df.to_dict(orient='records') |
| |
| hierarchy = defaultdict(set) |
| for row in recs: |
| hierarchy[row['source']].add(row['target']) |
| |
| def find_cycle(g): |
| """Whether there's a cycle in a directed graph""" |
| path = set() |
| |
| def visit(vertex): |
| path.add(vertex) |
| for neighbour in g.get(vertex, ()): |
| if neighbour in path or visit(neighbour): |
| return (vertex, neighbour) |
| path.remove(vertex) |
| |
| for v in g: |
| cycle = visit(v) |
| if cycle: |
| return cycle |
| |
| cycle = find_cycle(hierarchy) |
| if cycle: |
| raise Exception( |
| "There's a loop in your Sankey, please provide a tree. " |
| "Here's a faulty link: {}".format(cycle)) |
| return recs |
| |
| |
| class DirectedForceViz(BaseViz): |
| |
| """An animated directed force layout graph visualization""" |
| |
| viz_type = "directed_force" |
| verbose_name = _("Directed Force Layout") |
| credits = 'd3noob @<a href="http://bl.ocks.org/d3noob/5141278">bl.ocks.org</a>' |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'groupby', |
| 'metric', |
| 'row_limit', |
| ) |
| }, { |
| 'label': _('Force Layout'), |
| 'fields': ( |
| 'link_length', |
| 'charge', |
| ) |
| },) |
| form_overrides = { |
| 'groupby': { |
| 'label': _('Source / Target'), |
| 'description': _("Choose a source and a target"), |
| }, |
| } |
| |
| def query_obj(self): |
| qry = super(DirectedForceViz, self).query_obj() |
| if len(self.form_data['groupby']) != 2: |
| raise Exception("Pick exactly 2 columns to 'Group By'") |
| qry['metrics'] = [self.form_data['metric']] |
| return qry |
| |
| def get_data(self): |
| df = self.get_df() |
| df.columns = ['source', 'target', 'value'] |
| return df.to_dict(orient='records') |
| |
| |
| class WorldMapViz(BaseViz): |
| |
| """A country centric world map""" |
| |
| viz_type = "world_map" |
| verbose_name = _("World Map") |
| is_timeseries = False |
| credits = 'datamaps on <a href="https://www.npmjs.com/package/datamaps">npm</a>' |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'entity', |
| 'country_fieldtype', |
| 'metric', |
| ) |
| }, { |
| 'label': _('Bubbles'), |
| 'fields': ( |
| ('show_bubbles', None), |
| 'secondary_metric', |
| 'max_bubble_size', |
| ) |
| }) |
| form_overrides = { |
| 'entity': { |
| 'label': _('Country Field'), |
| 'description': _("3 letter code of the country"), |
| }, |
| 'metric': { |
| 'label': _('Metric for color'), |
| 'description': _("Metric that defines the color of the country"), |
| }, |
| 'secondary_metric': { |
| 'label': _('Bubble size'), |
| 'description': _("Metric that defines the size of the bubble"), |
| }, |
| } |
| |
| def query_obj(self): |
| qry = super(WorldMapViz, self).query_obj() |
| qry['metrics'] = [ |
| self.form_data['metric'], self.form_data['secondary_metric']] |
| qry['groupby'] = [self.form_data['entity']] |
| return qry |
| |
| def get_data(self): |
| from superset.data import countries |
| df = self.get_df() |
| cols = [self.form_data.get('entity')] |
| metric = self.form_data.get('metric') |
| secondary_metric = self.form_data.get('secondary_metric') |
| if metric == secondary_metric: |
| ndf = df[cols] |
| # df[metric] will be a DataFrame |
| # because there are duplicate column names |
| ndf['m1'] = df[metric].iloc[:, 0] |
| ndf['m2'] = ndf['m1'] |
| else: |
| cols += [metric, secondary_metric] |
| ndf = df[cols] |
| df = ndf |
| df.columns = ['country', 'm1', 'm2'] |
| d = df.to_dict(orient='records') |
| for row in d: |
| country = None |
| if isinstance(row['country'], string_types): |
| country = countries.get( |
| self.form_data.get('country_fieldtype'), row['country']) |
| |
| if country: |
| row['country'] = country['cca3'] |
| row['latitude'] = country['lat'] |
| row['longitude'] = country['lng'] |
| row['name'] = country['name'] |
| else: |
| row['country'] = "XXX" |
| return d |
| |
| |
| class FilterBoxViz(BaseViz): |
| |
| """A multi filter, multi-choice filter box to make dashboards interactive""" |
| |
| viz_type = "filter_box" |
| verbose_name = _("Filters") |
| is_timeseries = False |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| ('date_filter', None), |
| 'groupby', |
| 'metric', |
| ) |
| },) |
| form_overrides = { |
| 'groupby': { |
| 'label': _('Filter fields'), |
| 'description': _("The fields you want to filter on"), |
| 'default': [], |
| }, |
| } |
| |
| def query_obj(self): |
| qry = super(FilterBoxViz, self).query_obj() |
| groupby = self.form_data.get('groupby') |
| if len(groupby) < 1 and not self.form_data.get('date_filter'): |
| raise Exception("Pick at least one filter field") |
| qry['metrics'] = [ |
| self.form_data['metric']] |
| return qry |
| |
| def get_data(self): |
| qry = self.query_obj() |
| filters = [g for g in self.form_data['groupby']] |
| d = {} |
| for flt in filters: |
| qry['groupby'] = [flt] |
| df = super(FilterBoxViz, self).get_df(qry) |
| d[flt] = [{ |
| 'id': row[0], |
| 'text': row[0], |
| 'filter': flt, |
| 'metric': row[1]} |
| for row in df.itertuples(index=False) |
| ] |
| return d |
| |
| |
| class IFrameViz(BaseViz): |
| |
| """You can squeeze just about anything in this iFrame component""" |
| |
| viz_type = "iframe" |
| verbose_name = _("iFrame") |
| credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original' |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ('url',) |
| },) |
| |
| |
| class ParallelCoordinatesViz(BaseViz): |
| |
| """Interactive parallel coordinate implementation |
| |
| Uses this amazing javascript library |
| https://github.com/syntagmatic/parallel-coordinates |
| """ |
| |
| viz_type = "para" |
| verbose_name = _("Parallel Coordinates") |
| credits = ( |
| '<a href="https://syntagmatic.github.io/parallel-coordinates/">' |
| 'Syntagmatic\'s library</a>') |
| is_timeseries = False |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'series', |
| 'metrics', |
| 'secondary_metric', |
| 'limit', |
| ('show_datatable', 'include_series'), |
| ) |
| },) |
| |
| def query_obj(self): |
| d = super(ParallelCoordinatesViz, self).query_obj() |
| fd = self.form_data |
| d['metrics'] = copy.copy(fd.get('metrics')) |
| second = fd.get('secondary_metric') |
| if second not in d['metrics']: |
| d['metrics'] += [second] |
| d['groupby'] = [fd.get('series')] |
| return d |
| |
| def get_data(self): |
| df = self.get_df() |
| return df.to_dict(orient="records") |
| |
| |
| class HeatmapViz(BaseViz): |
| |
| """A nice heatmap visualization that support high density through canvas""" |
| |
| viz_type = "heatmap" |
| verbose_name = _("Heatmap") |
| is_timeseries = False |
| credits = ( |
| 'inspired from mbostock @<a href="http://bl.ocks.org/mbostock/3074470">' |
| 'bl.ocks.org</a>') |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| 'all_columns_x', |
| 'all_columns_y', |
| 'metric', |
| ) |
| }, { |
| 'label': _('Heatmap Options'), |
| 'fields': ( |
| 'linear_color_scheme', |
| ('xscale_interval', 'yscale_interval'), |
| 'canvas_image_rendering', |
| 'normalize_across', |
| ) |
| },) |
| |
| def query_obj(self): |
| d = super(HeatmapViz, self).query_obj() |
| fd = self.form_data |
| d['metrics'] = [fd.get('metric')] |
| d['groupby'] = [fd.get('all_columns_x'), fd.get('all_columns_y')] |
| return d |
| |
| def get_data(self): |
| df = self.get_df() |
| fd = self.form_data |
| x = fd.get('all_columns_x') |
| y = fd.get('all_columns_y') |
| v = fd.get('metric') |
| if x == y: |
| df.columns = ['x', 'y', 'v'] |
| else: |
| df = df[[x, y, v]] |
| df.columns = ['x', 'y', 'v'] |
| norm = fd.get('normalize_across') |
| overall = False |
| if norm == 'heatmap': |
| overall = True |
| else: |
| gb = df.groupby(norm, group_keys=False) |
| if len(gb) <= 1: |
| overall = True |
| else: |
| df['perc'] = ( |
| gb.apply( |
| lambda x: (x.v - x.v.min()) / (x.v.max() - x.v.min())) |
| ) |
| if overall: |
| v = df.v |
| min_ = v.min() |
| df['perc'] = (v - min_) / (v.max() - min_) |
| return df.to_dict(orient="records") |
| |
| |
| class HorizonViz(NVD3TimeSeriesViz): |
| |
| """Horizon chart |
| |
| https://www.npmjs.com/package/d3-horizon-chart |
| """ |
| |
| viz_type = "horizon" |
| verbose_name = _("Horizon Charts") |
| credits = ( |
| '<a href="https://www.npmjs.com/package/d3-horizon-chart">' |
| 'd3-horizon-chart</a>') |
| fieldsets = [NVD3TimeSeriesViz.fieldsets[0]] + [{ |
| 'label': _('Chart Options'), |
| 'fields': ( |
| ('series_height', 'horizon_color_scale'), |
| ), }] |
| |
| |
| class MapboxViz(BaseViz): |
| |
| """Rich maps made with Mapbox""" |
| |
| viz_type = "mapbox" |
| verbose_name = _("Mapbox") |
| is_timeseries = False |
| credits = ( |
| '<a href=https://www.mapbox.com/mapbox-gl-js/api/>Mapbox GL JS</a>') |
| fieldsets = ({ |
| 'label': None, |
| 'fields': ( |
| ('all_columns_x', 'all_columns_y'), |
| 'clustering_radius', |
| 'row_limit', |
| 'groupby', |
| 'render_while_dragging', |
| ) |
| }, { |
| 'label': _('Points'), |
| 'fields': ( |
| 'point_radius', |
| 'point_radius_unit', |
| ) |
| }, { |
| 'label': _('Labelling'), |
| 'fields': ( |
| 'mapbox_label', |
| 'pandas_aggfunc', |
| ) |
| }, { |
| 'label': _('Visual Tweaks'), |
| 'fields': ( |
| 'mapbox_style', |
| 'global_opacity', |
| 'mapbox_color', |
| ) |
| }, { |
| 'label': _('Viewport'), |
| 'fields': ( |
| 'viewport_longitude', |
| 'viewport_latitude', |
| 'viewport_zoom', |
| ) |
| },) |
| |
| form_overrides = { |
| 'all_columns_x': { |
| 'label': _('Longitude'), |
| 'description': _("Column containing longitude data"), |
| }, |
| 'all_columns_y': { |
| 'label': _('Latitude'), |
| 'description': _("Column containing latitude data"), |
| }, |
| 'pandas_aggfunc': { |
| 'label': _('Cluster label aggregator'), |
| 'description': _( |
| "Aggregate function applied to the list of points " |
| "in each cluster to produce the cluster label."), |
| }, |
| 'rich_tooltip': { |
| 'label': _('Tooltip'), |
| 'description': _( |
| "Show a tooltip when hovering over points and clusters " |
| "describing the label"), |
| }, |
| 'groupby': { |
| 'description': _( |
| "One or many fields to group by. If grouping, latitude " |
| "and longitude columns must be present."), |
| }, |
| } |
| |
| def query_obj(self): |
| d = super(MapboxViz, self).query_obj() |
| fd = self.form_data |
| label_col = fd.get('mapbox_label') |
| |
| if not fd.get('groupby'): |
| d['columns'] = [fd.get('all_columns_x'), fd.get('all_columns_y')] |
| |
| if label_col and len(label_col) >= 1: |
| if label_col[0] == "count": |
| raise Exception( |
| "Must have a [Group By] column to have 'count' as the [Label]") |
| d['columns'].append(label_col[0]) |
| |
| if fd.get('point_radius') != 'Auto': |
| d['columns'].append(fd.get('point_radius')) |
| |
| d['columns'] = list(set(d['columns'])) |
| else: |
| # Ensuring columns chosen are all in group by |
| if (label_col and len(label_col) >= 1 and |
| label_col[0] != "count" and |
| label_col[0] not in fd.get('groupby')): |
| raise Exception( |
| "Choice of [Label] must be present in [Group By]") |
| |
| if (fd.get("point_radius") != "Auto" and |
| fd.get("point_radius") not in fd.get('groupby')): |
| raise Exception( |
| "Choice of [Point Radius] must be present in [Group By]") |
| |
| if (fd.get('all_columns_x') not in fd.get('groupby') or |
| fd.get('all_columns_y') not in fd.get('groupby')): |
| raise Exception( |
| "[Longitude] and [Latitude] columns must be present in [Group By]") |
| return d |
| |
| def get_data(self): |
| df = self.get_df() |
| fd = self.form_data |
| label_col = fd.get('mapbox_label') |
| custom_metric = label_col and len(label_col) >= 1 |
| metric_col = [None] * len(df.index) |
| if custom_metric: |
| if label_col[0] == fd.get('all_columns_x'): |
| metric_col = df[fd.get('all_columns_x')] |
| elif label_col[0] == fd.get('all_columns_y'): |
| metric_col = df[fd.get('all_columns_y')] |
| else: |
| metric_col = df[label_col[0]] |
| point_radius_col = ( |
| [None] * len(df.index) |
| if fd.get("point_radius") == "Auto" |
| else df[fd.get("point_radius")]) |
| |
| # using geoJSON formatting |
| geo_json = { |
| "type": "FeatureCollection", |
| "features": [ |
| { |
| "type": "Feature", |
| "properties": { |
| "metric": metric, |
| "radius": point_radius, |
| }, |
| "geometry": { |
| "type": "Point", |
| "coordinates": [lon, lat], |
| } |
| } |
| for lon, lat, metric, point_radius |
| in zip( |
| df[fd.get('all_columns_x')], |
| df[fd.get('all_columns_y')], |
| metric_col, point_radius_col) |
| ] |
| } |
| |
| return { |
| "geoJSON": geo_json, |
| "customMetric": custom_metric, |
| "mapboxApiKey": config.get('MAPBOX_API_KEY'), |
| "mapStyle": fd.get("mapbox_style"), |
| "aggregatorName": fd.get("pandas_aggfunc"), |
| "clusteringRadius": fd.get("clustering_radius"), |
| "pointRadiusUnit": fd.get("point_radius_unit"), |
| "globalOpacity": fd.get("global_opacity"), |
| "viewportLongitude": fd.get("viewport_longitude"), |
| "viewportLatitude": fd.get("viewport_latitude"), |
| "viewportZoom": fd.get("viewport_zoom"), |
| "renderWhileDragging": fd.get("render_while_dragging"), |
| "tooltip": fd.get("rich_tooltip"), |
| "color": fd.get("mapbox_color"), |
| } |
| |
| |
| viz_types_list = [ |
| TableViz, |
| PivotTableViz, |
| NVD3TimeSeriesViz, |
| NVD3CompareTimeSeriesViz, |
| NVD3TimeSeriesStackedViz, |
| NVD3TimeSeriesBarViz, |
| DistributionBarViz, |
| DistributionPieViz, |
| BubbleViz, |
| MarkupViz, |
| WordCloudViz, |
| BigNumberViz, |
| BigNumberTotalViz, |
| SunburstViz, |
| DirectedForceViz, |
| SankeyViz, |
| WorldMapViz, |
| FilterBoxViz, |
| IFrameViz, |
| ParallelCoordinatesViz, |
| HeatmapViz, |
| BoxPlotViz, |
| TreemapViz, |
| CalHeatmapViz, |
| HorizonViz, |
| MapboxViz, |
| HistogramViz, |
| SeparatorViz, |
| ] |
| |
| viz_types = OrderedDict([(v.viz_type, v) for v in viz_types_list |
| if v.viz_type not in config.get('VIZ_TYPE_BLACKLIST')]) |