Merge branch 'master' into 0.21
diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py
index acb1951..a666253 100644
--- a/superset/connectors/druid/models.py
+++ b/superset/connectors/druid/models.py
@@ -908,6 +908,9 @@
column_name,
limit=10000):
"""Retrieve some values for the given column"""
+ logging.info(
+ 'Getting values for columns [{}] limited to [{}]'
+ .format(column_name, limit))
# TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
if self.fetch_values_from:
from_dttm = utils.parse_human_datetime(self.fetch_values_from)
@@ -954,6 +957,37 @@
ret = Filter(type='and', fields=[ff, dim_filter])
return ret
+ def get_aggregations(self, all_metrics):
+ aggregations = OrderedDict()
+ for m in self.metrics:
+ if m.metric_name in all_metrics:
+ aggregations[m.metric_name] = m.json_obj
+ return aggregations
+
+ def check_restricted_metrics(self, aggregations):
+ rejected_metrics = [
+ m.metric_name for m in self.metrics
+ if m.is_restricted and
+ m.metric_name in aggregations.keys() and
+ not sm.has_access('metric_access', m.perm)
+ ]
+ if rejected_metrics:
+ raise MetricPermException(
+ 'Access to the metrics denied: ' + ', '.join(rejected_metrics),
+ )
+
+ def get_dimensions(self, groupby, columns_dict):
+ dimensions = []
+ groupby = [gb for gb in groupby if gb in columns_dict]
+ for column_name in groupby:
+ col = columns_dict.get(column_name)
+ dim_spec = col.dimension_spec if col else None
+ if dim_spec:
+ dimensions.append(dim_spec)
+ else:
+ dimensions.append(column_name)
+ return dimensions
+
def run_query( # noqa / druid
self,
groupby, metrics,
@@ -987,40 +1021,17 @@
query_str = ''
metrics_dict = {m.metric_name: m for m in self.metrics}
-
columns_dict = {c.column_name: c for c in self.columns}
all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
metrics,
metrics_dict)
- aggregations = OrderedDict()
- for m in self.metrics:
- if m.metric_name in all_metrics:
- aggregations[m.metric_name] = m.json_obj
-
- rejected_metrics = [
- m.metric_name for m in self.metrics
- if m.is_restricted and
- m.metric_name in aggregations.keys() and
- not sm.has_access('metric_access', m.perm)
- ]
-
- if rejected_metrics:
- raise MetricPermException(
- 'Access to the metrics denied: ' + ', '.join(rejected_metrics),
- )
+ aggregations = self.get_aggregations(all_metrics)
+ self.check_restricted_metrics(aggregations)
# the dimensions list with dimensionSpecs expanded
- dimensions = []
- groupby = [gb for gb in groupby if gb in columns_dict]
- for column_name in groupby:
- col = columns_dict.get(column_name)
- dim_spec = col.dimension_spec
- if dim_spec:
- dimensions.append(dim_spec)
- else:
- dimensions.append(column_name)
+ dimensions = self.get_dimensions(groupby, columns_dict)
extras = extras or {}
qry = dict(
datasource=self.datasource_name,
@@ -1042,17 +1053,20 @@
having_filters = self.get_having_filters(extras.get('having_druid'))
if having_filters:
qry['having'] = having_filters
+
order_direction = 'descending' if order_desc else 'ascending'
+
if len(groupby) == 0 and not having_filters:
+ logging.info('Running timeseries query for no groupby values')
del qry['dimensions']
client.timeseries(**qry)
elif (
not having_filters and
len(groupby) == 1 and
- order_desc and
- not isinstance(list(qry.get('dimensions'))[0], dict)
+ order_desc
):
dim = list(qry.get('dimensions'))[0]
+ logging.info('Running two-phase topn query for dimension [{}]'.format(dim))
if timeseries_limit_metric:
order_by = timeseries_limit_metric
else:
@@ -1063,9 +1077,14 @@
pre_qry['threshold'] = min(row_limit,
timeseries_limit or row_limit)
pre_qry['metric'] = order_by
- pre_qry['dimension'] = dim
+ if isinstance(dim, dict):
+ if 'dimension' in dim:
+ pre_qry['dimension'] = dim['dimension']
+ else:
+ pre_qry['dimension'] = dim
del pre_qry['dimensions']
client.topn(**pre_qry)
+ logging.info('Phase 1 Complete')
query_str += '// Two phase query\n// Phase 1\n'
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2)
@@ -1077,19 +1096,22 @@
df = client.export_pandas()
qry['filter'] = self._add_filter_from_pre_query_data(
df,
- qry['dimensions'], filters)
+ [pre_qry['dimension']],
+ filters)
qry['threshold'] = timeseries_limit or 1000
if row_limit and granularity == 'all':
qry['threshold'] = row_limit
- qry['dimension'] = list(qry.get('dimensions'))[0]
qry['dimension'] = dim
del qry['dimensions']
qry['metric'] = list(qry['aggregations'].keys())[0]
client.topn(**qry)
+ logging.info('Phase 2 Complete')
elif len(groupby) > 0:
# If grouping on multiple fields or using a having filter
# we have to force a groupby query
+ logging.info('Running groupby query for dimensions [{}]'.format(dimensions))
if timeseries_limit and is_timeseries:
+ logging.info('Running two-phase query for timeseries')
order_by = metrics[0] if metrics else self.metrics[0]
if timeseries_limit_metric:
order_by = timeseries_limit_metric
@@ -1107,7 +1129,18 @@
'direction': order_direction,
}],
}
+ pre_qry_dims = []
+ # Replace dimensions specs with their `dimension`
+ # values, and ignore those without
+ for dim in qry['dimensions']:
+ if isinstance(dim, dict):
+ if 'dimension' in dim:
+ pre_qry_dims.append(dim['dimension'])
+ else:
+ pre_qry_dims.append(dim)
+ pre_qry['dimensions'] = list(set(pre_qry_dims))
client.groupby(**pre_qry)
+ logging.info('Phase 1 Complete')
query_str += '// Two phase query\n// Phase 1\n'
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2)
@@ -1119,7 +1152,7 @@
df = client.export_pandas()
qry['filter'] = self._add_filter_from_pre_query_data(
df,
- qry['dimensions'],
+ pre_qry['dimensions'],
filters,
)
qry['limit_spec'] = None
@@ -1134,6 +1167,7 @@
}],
}
client.groupby(**qry)
+ logging.info('Query Complete')
query_str += json.dumps(
client.query_builder.last_query.query_dict, indent=2)
return query_str
diff --git a/superset/connectors/druid/views.py b/superset/connectors/druid/views.py
index ad3664b..66b3bc5 100644
--- a/superset/connectors/druid/views.py
+++ b/superset/connectors/druid/views.py
@@ -1,4 +1,5 @@
from datetime import datetime
+import json
import logging
from flask import flash, Markup, redirect
@@ -61,9 +62,28 @@
True),
}
+ def pre_update(self, col):
+ # If a dimension spec JSON is given, ensure that it is
+ # valid JSON and that `outputName` is specified
+ if col.dimension_spec_json:
+ try:
+ dimension_spec = json.loads(col.dimension_spec_json)
+ except ValueError as e:
+ raise ValueError('Invalid Dimension Spec JSON: ' + str(e))
+ if not isinstance(dimension_spec, dict):
+ raise ValueError('Dimension Spec must be a JSON object')
+ if 'outputName' not in dimension_spec:
+ raise ValueError('Dimension Spec does not contain `outputName`')
+ if 'dimension' not in dimension_spec:
+ raise ValueError('Dimension Spec is missing `dimension`')
+ # `outputName` should be the same as the `column_name`
+ if dimension_spec['outputName'] != col.column_name:
+ raise ValueError(
+ '`outputName` [{}] unequal to `column_name` [{}]'
+ .format(dimension_spec['outputName'], col.column_name))
+
def post_update(self, col):
col.generate_metrics()
- utils.validate_json(col.dimension_spec_json)
def post_add(self, col):
self.post_update(col)
diff --git a/superset/views/core.py b/superset/views/core.py
index d1219ea..8307e4f 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py
@@ -735,58 +735,6 @@
class Superset(BaseSupersetView):
"""The base views for Superset!"""
- @api
- @has_access_api
- @expose('/update_role/', methods=['POST'])
- def update_role(self):
- """Assigns a list of found users to the given role."""
- data = request.get_json(force=True)
- gamma_role = sm.find_role('Gamma')
-
- username_set = set()
- user_data_dict = {}
- for user_data in data['users']:
- username = user_data['username']
- if not username:
- continue
- user_data_dict[username] = user_data
- username_set.add(username)
-
- existing_users = db.session.query(sm.user_model).filter(
- sm.user_model.username.in_(username_set)).all()
- missing_users = username_set.difference(
- set([u.username for u in existing_users]))
- logging.info('Missing users: {}'.format(missing_users))
-
- created_users = []
- for username in missing_users:
- user_data = user_data_dict[username]
- user = sm.find_user(email=user_data['email'])
- if not user:
- logging.info('Adding user: {}.'.format(user_data))
- sm.add_user(
- username=user_data['username'],
- first_name=user_data['first_name'],
- last_name=user_data['last_name'],
- email=user_data['email'],
- role=gamma_role,
- )
- sm.get_session.commit()
- user = sm.find_user(username=user_data['username'])
- existing_users.append(user)
- created_users.append(user.username)
-
- role_name = data['role_name']
- role = sm.find_role(role_name)
- role.user = existing_users
- sm.get_session.commit()
- return self.json_response({
- 'role': role_name,
- '# missing users': len(missing_users),
- '# granted': len(existing_users),
- 'created_users': created_users,
- }, status=201)
-
def json_response(self, obj, status=200):
return Response(
json.dumps(obj, default=utils.json_int_dttm_ser),
diff --git a/superset/viz.py b/superset/viz.py
index e3c1737..0eeed23 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -27,7 +27,7 @@
import pandas as pd
from pandas.tseries.frequencies import to_offset
import simplejson as json
-from six import PY3, string_types
+from six import PY3, string_types, text_type
from six.moves import reduce
from superset import app, cache, get_manifest_file, utils
@@ -1300,9 +1300,9 @@
for i, v in ys.iteritems():
x = i
if isinstance(x, (tuple, list)):
- x = ', '.join([str(s) for s in x])
+ x = ', '.join([text_type(s) for s in x])
else:
- x = str(x)
+ x = text_type(x)
values.append({
'x': x,
'y': v,
diff --git a/tests/access_tests.py b/tests/access_tests.py
index 2f8140f..d33cbc6 100644
--- a/tests/access_tests.py
+++ b/tests/access_tests.py
@@ -520,79 +520,6 @@
gamma_user.roles.remove(sm.find_role('dummy_role'))
session.commit()
- def test_update_role_do_not_exist(self):
- update_role_str = 'update_me'
- update_role = sm.find_role(update_role_str)
- if update_role:
- db.session.delete(update_role)
- db.session.commit()
- data = json.dumps({
- 'users': [{
- 'username': 'gamma',
- 'first_name': 'Gamma',
- 'last_name': 'Gamma',
- 'email': 'gamma@superset.com',
- }],
- 'role_name': update_role_str})
- r = self.client.post('/superset/update_role/', data=data,
- follow_redirects=True)
- self.assertEquals(500, r.status_code)
-
- def test_update_role(self):
- update_role_str = 'update_me'
- sm.add_role(update_role_str)
- db.session.commit()
- resp = self.client.post(
- '/superset/update_role/',
- data=json.dumps({
- 'users': [{
- 'username': 'gamma',
- 'first_name': 'Gamma',
- 'last_name': 'Gamma',
- 'email': 'gamma@superset.com',
- }],
- 'role_name': update_role_str,
- }),
- follow_redirects=True,
- )
- update_role = sm.find_role(update_role_str)
- self.assertEquals(
- update_role.user, [sm.find_user(username='gamma')])
- self.assertEquals(resp.status_code, 201)
-
- resp = self.client.post(
- '/superset/update_role/',
- data=json.dumps({
- 'users': [{
- 'username': 'alpha',
- 'first_name': 'Alpha',
- 'last_name': 'Alpha',
- 'email': 'alpha@superset.com',
- }, {
- 'username': 'unknown',
- 'first_name': 'Unknown1',
- 'last_name': 'Unknown2',
- 'email': 'unknown@superset.com',
- }],
- 'role_name': update_role_str,
- }),
- follow_redirects=True,
- )
- self.assertEquals(resp.status_code, 201)
- update_role = sm.find_role(update_role_str)
- self.assertEquals(
- update_role.user, [
- sm.find_user(username='alpha'),
- sm.find_user(username='unknown'),
- ])
- unknown = sm.find_user(username='unknown')
- self.assertEquals('Unknown2', unknown.last_name)
- self.assertEquals('Unknown1', unknown.first_name)
- self.assertEquals('unknown@superset.com', unknown.email)
- db.session.delete(update_role)
- db.session.delete(unknown)
- db.session.commit()
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/druid_func_tests.py b/tests/druid_func_tests.py
index 4c047df..74da486 100644
--- a/tests/druid_func_tests.py
+++ b/tests/druid_func_tests.py
@@ -226,7 +226,8 @@
self.assertIn('dimensions', client.groupby.call_args_list[0][1])
self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions'])
# order_desc but timeseries and dimension spec
- spec = {'spec': 1}
+ # calls topn with single dimension spec 'dimension'
+ spec = {'outputName': 'hello', 'dimension': 'matcho'}
spec_json = json.dumps(spec)
col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
ds.columns.append(col3)
@@ -238,13 +239,14 @@
client=client, order_desc=True, timeseries_limit=5,
filter=[], row_limit=100,
)
- self.assertEqual(0, len(client.topn.call_args_list))
- self.assertEqual(2, len(client.groupby.call_args_list))
+ self.assertEqual(2, len(client.topn.call_args_list))
+ self.assertEqual(0, len(client.groupby.call_args_list))
self.assertEqual(0, len(client.timeseries.call_args_list))
- self.assertIn('dimensions', client.groupby.call_args_list[0][1])
- self.assertIn('dimensions', client.groupby.call_args_list[1][1])
- self.assertEqual([spec], client.groupby.call_args_list[0][1]['dimensions'])
- self.assertEqual([spec], client.groupby.call_args_list[1][1]['dimensions'])
+ self.assertIn('dimension', client.topn.call_args_list[0][1])
+ self.assertIn('dimension', client.topn.call_args_list[1][1])
+ # uses dimension for pre query and full spec for final query
+ self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension'])
+ self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
def test_run_query_multiple_groupby(self):
client = Mock()
diff --git a/tests/security_tests.py b/tests/security_tests.py
index cb2ff63..5c32a97 100644
--- a/tests/security_tests.py
+++ b/tests/security_tests.py
@@ -94,7 +94,6 @@
self.assertIn(('can_sync_druid_source', 'Superset'), perm_set)
self.assertIn(('can_override_role_permissions', 'Superset'), perm_set)
self.assertIn(('can_approve', 'Superset'), perm_set)
- self.assertIn(('can_update_role', 'Superset'), perm_set)
def test_is_admin_only(self):
self.assertFalse(security.is_admin_only(