Merge branch 'master' into 0.21

commit: df8b3997e91c9c8da48d7de0e5a18a3f800ea658 [log] [tgz]
author: Maxime Beauchemin <maximebeauchemin@gmail.com> Mon Dec 11 13:17:49 2017 -0800
committer: Maxime Beauchemin <maximebeauchemin@gmail.com> Mon Dec 11 13:17:49 2017 -0800
tree: 979879750ccbbe12aff626ade4c4a897c1afb926
parent: 39d963284347e754d8d5436803d000e86da39ba8 [diff]
parent: 7d374428d375349a9966a39f979d51c72202b969 [diff]
diff --git a/superset/connectors/druid/models.py b/superset/connectors/druid/models.py
index acb1951..a666253 100644
--- a/superset/connectors/druid/models.py
+++ b/superset/connectors/druid/models.py

@@ -908,6 +908,9 @@
                           column_name,
                           limit=10000):
         """Retrieve some values for the given column"""
+        logging.info(
+            'Getting values for columns [{}] limited to [{}]'
+            .format(column_name, limit))
         # TODO: Use Lexicographic TopNMetricSpec once supported by PyDruid
         if self.fetch_values_from:
             from_dttm = utils.parse_human_datetime(self.fetch_values_from)
@@ -954,6 +957,37 @@
                     ret = Filter(type='and', fields=[ff, dim_filter])
         return ret
 
+    def get_aggregations(self, all_metrics):
+        aggregations = OrderedDict()
+        for m in self.metrics:
+            if m.metric_name in all_metrics:
+                aggregations[m.metric_name] = m.json_obj
+        return aggregations
+
+    def check_restricted_metrics(self, aggregations):
+        rejected_metrics = [
+            m.metric_name for m in self.metrics
+            if m.is_restricted and
+            m.metric_name in aggregations.keys() and
+            not sm.has_access('metric_access', m.perm)
+        ]
+        if rejected_metrics:
+            raise MetricPermException(
+                'Access to the metrics denied: ' + ', '.join(rejected_metrics),
+            )
+
+    def get_dimensions(self, groupby, columns_dict):
+        dimensions = []
+        groupby = [gb for gb in groupby if gb in columns_dict]
+        for column_name in groupby:
+            col = columns_dict.get(column_name)
+            dim_spec = col.dimension_spec if col else None
+            if dim_spec:
+                dimensions.append(dim_spec)
+            else:
+                dimensions.append(column_name)
+        return dimensions
+
     def run_query(  # noqa / druid
             self,
             groupby, metrics,
@@ -987,40 +1021,17 @@
 
         query_str = ''
         metrics_dict = {m.metric_name: m for m in self.metrics}
-
         columns_dict = {c.column_name: c for c in self.columns}
 
         all_metrics, post_aggs = DruidDatasource.metrics_and_post_aggs(
             metrics,
             metrics_dict)
 
-        aggregations = OrderedDict()
-        for m in self.metrics:
-            if m.metric_name in all_metrics:
-                aggregations[m.metric_name] = m.json_obj
-
-        rejected_metrics = [
-            m.metric_name for m in self.metrics
-            if m.is_restricted and
-            m.metric_name in aggregations.keys() and
-            not sm.has_access('metric_access', m.perm)
-        ]
-
-        if rejected_metrics:
-            raise MetricPermException(
-                'Access to the metrics denied: ' + ', '.join(rejected_metrics),
-            )
+        aggregations = self.get_aggregations(all_metrics)
+        self.check_restricted_metrics(aggregations)
 
         # the dimensions list with dimensionSpecs expanded
-        dimensions = []
-        groupby = [gb for gb in groupby if gb in columns_dict]
-        for column_name in groupby:
-            col = columns_dict.get(column_name)
-            dim_spec = col.dimension_spec
-            if dim_spec:
-                dimensions.append(dim_spec)
-            else:
-                dimensions.append(column_name)
+        dimensions = self.get_dimensions(groupby, columns_dict)
         extras = extras or {}
         qry = dict(
             datasource=self.datasource_name,
@@ -1042,17 +1053,20 @@
         having_filters = self.get_having_filters(extras.get('having_druid'))
         if having_filters:
             qry['having'] = having_filters
+
         order_direction = 'descending' if order_desc else 'ascending'
+
         if len(groupby) == 0 and not having_filters:
+            logging.info('Running timeseries query for no groupby values')
             del qry['dimensions']
             client.timeseries(**qry)
         elif (
             not having_filters and
             len(groupby) == 1 and
-            order_desc and
-            not isinstance(list(qry.get('dimensions'))[0], dict)
+            order_desc
         ):
             dim = list(qry.get('dimensions'))[0]
+            logging.info('Running two-phase topn query for dimension [{}]'.format(dim))
             if timeseries_limit_metric:
                 order_by = timeseries_limit_metric
             else:
@@ -1063,9 +1077,14 @@
             pre_qry['threshold'] = min(row_limit,
                                        timeseries_limit or row_limit)
             pre_qry['metric'] = order_by
-            pre_qry['dimension'] = dim
+            if isinstance(dim, dict):
+                if 'dimension' in dim:
+                    pre_qry['dimension'] = dim['dimension']
+            else:
+                pre_qry['dimension'] = dim
             del pre_qry['dimensions']
             client.topn(**pre_qry)
+            logging.info('Phase 1 Complete')
             query_str += '// Two phase query\n// Phase 1\n'
             query_str += json.dumps(
                 client.query_builder.last_query.query_dict, indent=2)
@@ -1077,19 +1096,22 @@
             df = client.export_pandas()
             qry['filter'] = self._add_filter_from_pre_query_data(
                 df,
-                qry['dimensions'], filters)
+                [pre_qry['dimension']],
+                filters)
             qry['threshold'] = timeseries_limit or 1000
             if row_limit and granularity == 'all':
                 qry['threshold'] = row_limit
-            qry['dimension'] = list(qry.get('dimensions'))[0]
             qry['dimension'] = dim
             del qry['dimensions']
             qry['metric'] = list(qry['aggregations'].keys())[0]
             client.topn(**qry)
+            logging.info('Phase 2 Complete')
         elif len(groupby) > 0:
             # If grouping on multiple fields or using a having filter
             # we have to force a groupby query
+            logging.info('Running groupby query for dimensions [{}]'.format(dimensions))
             if timeseries_limit and is_timeseries:
+                logging.info('Running two-phase query for timeseries')
                 order_by = metrics[0] if metrics else self.metrics[0]
                 if timeseries_limit_metric:
                     order_by = timeseries_limit_metric
@@ -1107,7 +1129,18 @@
                         'direction': order_direction,
                     }],
                 }
+                pre_qry_dims = []
+                # Replace dimensions specs with their `dimension`
+                # values, and ignore those without
+                for dim in qry['dimensions']:
+                    if isinstance(dim, dict):
+                        if 'dimension' in dim:
+                            pre_qry_dims.append(dim['dimension'])
+                    else:
+                        pre_qry_dims.append(dim)
+                pre_qry['dimensions'] = list(set(pre_qry_dims))
                 client.groupby(**pre_qry)
+                logging.info('Phase 1 Complete')
                 query_str += '// Two phase query\n// Phase 1\n'
                 query_str += json.dumps(
                     client.query_builder.last_query.query_dict, indent=2)
@@ -1119,7 +1152,7 @@
                 df = client.export_pandas()
                 qry['filter'] = self._add_filter_from_pre_query_data(
                     df,
-                    qry['dimensions'],
+                    pre_qry['dimensions'],
                     filters,
                 )
                 qry['limit_spec'] = None
@@ -1134,6 +1167,7 @@
                     }],
                 }
             client.groupby(**qry)
+            logging.info('Query Complete')
         query_str += json.dumps(
             client.query_builder.last_query.query_dict, indent=2)
         return query_str

diff --git a/superset/connectors/druid/views.py b/superset/connectors/druid/views.py
index ad3664b..66b3bc5 100644
--- a/superset/connectors/druid/views.py
+++ b/superset/connectors/druid/views.py

@@ -1,4 +1,5 @@
 from datetime import datetime
+import json
 import logging
 
 from flask import flash, Markup, redirect
@@ -61,9 +62,28 @@
             True),
     }
 
+    def pre_update(self, col):
+        # If a dimension spec JSON is given, ensure that it is
+        # valid JSON and that `outputName` is specified
+        if col.dimension_spec_json:
+            try:
+                dimension_spec = json.loads(col.dimension_spec_json)
+            except ValueError as e:
+                raise ValueError('Invalid Dimension Spec JSON: ' + str(e))
+            if not isinstance(dimension_spec, dict):
+                raise ValueError('Dimension Spec must be a JSON object')
+            if 'outputName' not in dimension_spec:
+                raise ValueError('Dimension Spec does not contain `outputName`')
+            if 'dimension' not in dimension_spec:
+                raise ValueError('Dimension Spec is missing `dimension`')
+            # `outputName` should be the same as the `column_name`
+            if dimension_spec['outputName'] != col.column_name:
+                raise ValueError(
+                    '`outputName` [{}] unequal to `column_name` [{}]'
+                    .format(dimension_spec['outputName'], col.column_name))
+
     def post_update(self, col):
         col.generate_metrics()
-        utils.validate_json(col.dimension_spec_json)
 
     def post_add(self, col):
         self.post_update(col)

diff --git a/superset/views/core.py b/superset/views/core.py
index d1219ea..8307e4f 100755
--- a/superset/views/core.py
+++ b/superset/views/core.py

@@ -735,58 +735,6 @@
 
 class Superset(BaseSupersetView):
     """The base views for Superset!"""
-    @api
-    @has_access_api
-    @expose('/update_role/', methods=['POST'])
-    def update_role(self):
-        """Assigns a list of found users to the given role."""
-        data = request.get_json(force=True)
-        gamma_role = sm.find_role('Gamma')
-
-        username_set = set()
-        user_data_dict = {}
-        for user_data in data['users']:
-            username = user_data['username']
-            if not username:
-                continue
-            user_data_dict[username] = user_data
-            username_set.add(username)
-
-        existing_users = db.session.query(sm.user_model).filter(
-            sm.user_model.username.in_(username_set)).all()
-        missing_users = username_set.difference(
-            set([u.username for u in existing_users]))
-        logging.info('Missing users: {}'.format(missing_users))
-
-        created_users = []
-        for username in missing_users:
-            user_data = user_data_dict[username]
-            user = sm.find_user(email=user_data['email'])
-            if not user:
-                logging.info('Adding user: {}.'.format(user_data))
-                sm.add_user(
-                    username=user_data['username'],
-                    first_name=user_data['first_name'],
-                    last_name=user_data['last_name'],
-                    email=user_data['email'],
-                    role=gamma_role,
-                )
-                sm.get_session.commit()
-                user = sm.find_user(username=user_data['username'])
-            existing_users.append(user)
-            created_users.append(user.username)
-
-        role_name = data['role_name']
-        role = sm.find_role(role_name)
-        role.user = existing_users
-        sm.get_session.commit()
-        return self.json_response({
-            'role': role_name,
-            '# missing users': len(missing_users),
-            '# granted': len(existing_users),
-            'created_users': created_users,
-        }, status=201)
-
     def json_response(self, obj, status=200):
         return Response(
             json.dumps(obj, default=utils.json_int_dttm_ser),

diff --git a/superset/viz.py b/superset/viz.py
index e3c1737..0eeed23 100644
--- a/superset/viz.py
+++ b/superset/viz.py

@@ -27,7 +27,7 @@
 import pandas as pd
 from pandas.tseries.frequencies import to_offset
 import simplejson as json
-from six import PY3, string_types
+from six import PY3, string_types, text_type
 from six.moves import reduce
 
 from superset import app, cache, get_manifest_file, utils
@@ -1300,9 +1300,9 @@
             for i, v in ys.iteritems():
                 x = i
                 if isinstance(x, (tuple, list)):
-                    x = ', '.join([str(s) for s in x])
+                    x = ', '.join([text_type(s) for s in x])
                 else:
-                    x = str(x)
+                    x = text_type(x)
                 values.append({
                     'x': x,
                     'y': v,

diff --git a/tests/access_tests.py b/tests/access_tests.py
index 2f8140f..d33cbc6 100644
--- a/tests/access_tests.py
+++ b/tests/access_tests.py

@@ -520,79 +520,6 @@
         gamma_user.roles.remove(sm.find_role('dummy_role'))
         session.commit()
 
-    def test_update_role_do_not_exist(self):
-        update_role_str = 'update_me'
-        update_role = sm.find_role(update_role_str)
-        if update_role:
-            db.session.delete(update_role)
-        db.session.commit()
-        data = json.dumps({
-            'users': [{
-                'username': 'gamma',
-                'first_name': 'Gamma',
-                'last_name': 'Gamma',
-                'email': 'gamma@superset.com',
-            }],
-            'role_name': update_role_str})
-        r = self.client.post('/superset/update_role/', data=data,
-                             follow_redirects=True)
-        self.assertEquals(500, r.status_code)
-
-    def test_update_role(self):
-        update_role_str = 'update_me'
-        sm.add_role(update_role_str)
-        db.session.commit()
-        resp = self.client.post(
-            '/superset/update_role/',
-            data=json.dumps({
-                'users': [{
-                    'username': 'gamma',
-                    'first_name': 'Gamma',
-                    'last_name': 'Gamma',
-                    'email': 'gamma@superset.com',
-                }],
-                'role_name': update_role_str,
-            }),
-            follow_redirects=True,
-        )
-        update_role = sm.find_role(update_role_str)
-        self.assertEquals(
-            update_role.user, [sm.find_user(username='gamma')])
-        self.assertEquals(resp.status_code, 201)
-
-        resp = self.client.post(
-            '/superset/update_role/',
-            data=json.dumps({
-                'users': [{
-                    'username': 'alpha',
-                    'first_name': 'Alpha',
-                    'last_name': 'Alpha',
-                    'email': 'alpha@superset.com',
-                }, {
-                    'username': 'unknown',
-                    'first_name': 'Unknown1',
-                    'last_name': 'Unknown2',
-                    'email': 'unknown@superset.com',
-                }],
-                'role_name': update_role_str,
-            }),
-            follow_redirects=True,
-        )
-        self.assertEquals(resp.status_code, 201)
-        update_role = sm.find_role(update_role_str)
-        self.assertEquals(
-            update_role.user, [
-                sm.find_user(username='alpha'),
-                sm.find_user(username='unknown'),
-            ])
-        unknown = sm.find_user(username='unknown')
-        self.assertEquals('Unknown2', unknown.last_name)
-        self.assertEquals('Unknown1', unknown.first_name)
-        self.assertEquals('unknown@superset.com', unknown.email)
-        db.session.delete(update_role)
-        db.session.delete(unknown)
-        db.session.commit()
-
 
 if __name__ == '__main__':
     unittest.main()

diff --git a/tests/druid_func_tests.py b/tests/druid_func_tests.py
index 4c047df..74da486 100644
--- a/tests/druid_func_tests.py
+++ b/tests/druid_func_tests.py

@@ -226,7 +226,8 @@
         self.assertIn('dimensions', client.groupby.call_args_list[0][1])
         self.assertEqual(['col1'], client.groupby.call_args_list[0][1]['dimensions'])
         # order_desc but timeseries and dimension spec
-        spec = {'spec': 1}
+        # calls topn with single dimension spec 'dimension'
+        spec = {'outputName': 'hello', 'dimension': 'matcho'}
         spec_json = json.dumps(spec)
         col3 = DruidColumn(column_name='col3', dimension_spec_json=spec_json)
         ds.columns.append(col3)
@@ -238,13 +239,14 @@
             client=client, order_desc=True, timeseries_limit=5,
             filter=[], row_limit=100,
         )
-        self.assertEqual(0, len(client.topn.call_args_list))
-        self.assertEqual(2, len(client.groupby.call_args_list))
+        self.assertEqual(2, len(client.topn.call_args_list))
+        self.assertEqual(0, len(client.groupby.call_args_list))
         self.assertEqual(0, len(client.timeseries.call_args_list))
-        self.assertIn('dimensions', client.groupby.call_args_list[0][1])
-        self.assertIn('dimensions', client.groupby.call_args_list[1][1])
-        self.assertEqual([spec], client.groupby.call_args_list[0][1]['dimensions'])
-        self.assertEqual([spec], client.groupby.call_args_list[1][1]['dimensions'])
+        self.assertIn('dimension', client.topn.call_args_list[0][1])
+        self.assertIn('dimension', client.topn.call_args_list[1][1])
+        # uses dimension for pre query and full spec for final query
+        self.assertEqual('matcho', client.topn.call_args_list[0][1]['dimension'])
+        self.assertEqual(spec, client.topn.call_args_list[1][1]['dimension'])
 
     def test_run_query_multiple_groupby(self):
         client = Mock()

diff --git a/tests/security_tests.py b/tests/security_tests.py
index cb2ff63..5c32a97 100644
--- a/tests/security_tests.py
+++ b/tests/security_tests.py

@@ -94,7 +94,6 @@
         self.assertIn(('can_sync_druid_source', 'Superset'), perm_set)
         self.assertIn(('can_override_role_permissions', 'Superset'), perm_set)
         self.assertIn(('can_approve', 'Superset'), perm_set)
-        self.assertIn(('can_update_role', 'Superset'), perm_set)
 
     def test_is_admin_only(self):
         self.assertFalse(security.is_admin_only(
commit	df8b3997e91c9c8da48d7de0e5a18a3f800ea658	[log] [tgz]
author	Maxime Beauchemin <maximebeauchemin@gmail.com>	Mon Dec 11 13:17:49 2017 -0800
committer	Maxime Beauchemin <maximebeauchemin@gmail.com>	Mon Dec 11 13:17:49 2017 -0800
tree	979879750ccbbe12aff626ade4c4a897c1afb926
parent	39d963284347e754d8d5436803d000e86da39ba8 [diff]
parent	7d374428d375349a9966a39f979d51c72202b969 [diff]