| #!/bin/env python |
| # -*- coding: utf-8 -*- |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ################################################################################ |
| # |
| # @file: test_sys_materialized_view_shoot.py |
| # @date: 2020-09-03 11:12:43 |
| # @brief: 验证duplicate表的物化视图的命中 |
| ################################################################################ |
| |
| """ |
| 新增hll_union, count, bitmap_union聚合的物化视图 |
| """ |
| |
| import sys |
| import os |
| sys.path.append("../") |
| from data import schema as DATA |
| from data import load_file as FILE |
| from lib import palo_client |
| from lib import palo_config |
| from lib import util |
| |
| config = palo_config.config |
| LOG = palo_client.LOG |
| L = palo_client.L |
| |
| broker_info = palo_config.broker_info |
| |
| |
| def get_explain_table(client, sql): |
| """ |
| Get explain table |
| """ |
| result = client.execute('EXPLAIN ' + sql) |
| if result is None: |
| return None |
| rollup_flag = 'rollup: ' |
| explain_table = list() |
| for element in result: |
| message = element[0].lstrip() |
| if message.startswith(rollup_flag): |
| explain_table.append(message[len(rollup_flag):].rstrip(' ')) |
| return explain_table |
| |
| |
| def check2(client, sql1, sql2): |
| """check 2 sql same result""" |
| ret1 = client.execute(sql1) |
| ret2 = client.execute(sql2) |
| util.check(ret1, ret2) |
| |
| |
| def setup_module(): |
| """ |
| Set up |
| """ |
| global query_db, database_name, tb_dup, tb_dup |
| global rollup_name1, rollup_name2, rollup_name3, rollup_name4, rollup_name5 |
| global mv_name1, mv_name2, mv_name3, mv_name4, mv_name5, mv_name6, mv_name7, mv_name8, mv_name_9, mv_name10 |
| if 'FE_DB' in os.environ.keys(): |
| query_db = os.environ["FE_DB"] |
| else: |
| query_db = "test_query_qa" |
| database_name = 'test_sys_materialized_view_shoot_test_shoot_db' |
| tb_dup = 'test_shoot_tb_dup' |
| mv_name1 = 'mv1' |
| mv_name2 = 'mv2' |
| mv_name3 = 'mv3' |
| mv_name4 = 'mv4' |
| mv_name5 = 'mv5' |
| mv_name6 = 'mv6' |
| mv_name7 = 'mv7' |
| mv_name8 = 'mv8' |
| mv_name9 = 'mv9' |
| mv_name10 = 'mv10' |
| init_mv() |
| |
| |
| def init_mv(): |
| """init db, table and rollup""" |
| client = palo_client.get_client(config.fe_host, config.fe_query_port) |
| client.clean(database_name) |
| client.create_database(database_name) |
| client.use(database_name) |
| ret = client.create_table(tb_dup, DATA.baseall_column_no_agg_list, |
| distribution_info=palo_client.DistributionInfo("HASH(k2)", 5), set_null=True) |
| |
| sql = 'select k1, k2, k3, k4, max(k8), sum(k9) from %s group by k1, k2, k3, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name1, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name1) |
| |
| sql = 'select k1, k3, max(k8), sum(k9) from %s group by k1, k3' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name2, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name2) |
| |
| sql = 'select k1, k2, k4, max(k8), sum(k9) from %s group by k1, k2, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name3, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name3) |
| |
| sql = 'select k2, k1, max(k8), sum(k9) from %s group by k2, k1' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name4, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name4) |
| |
| sql = 'select k2, k1, k3, k4, max(k8), sum(k9) from %s group by k2, k1, k3, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name5, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name5) |
| |
| sql = 'select k2, k1, k3, k4, count(k6), count(k7) from %s group by k2, k1, k3, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name6, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name6) |
| |
| sql = 'select k2, k1, k3, k4, hll_union(hll_hash(k10)), hll_union(hll_hash(k11)) from %s ' \ |
| 'group by k2, k1, k3, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name7, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name7) |
| |
| sql = 'select k2, k3, k4, bitmap_union(to_bitmap(k1)) from %s group by k2, k3, k4' % tb_dup |
| client.create_materialized_view(tb_dup, mv_name8, sql, is_wait=True) |
| assert client.show_tables(tb_dup) |
| assert client.get_index(tb_dup, index_name=mv_name8) |
| |
| column_name_list = ['k1', 'k2', 'k3', 'k4', 'k5', 'k6', 'k10', 'k11', 'k7', 'k8', 'k9'] |
| set_list = [''] |
| data_desc_list = palo_client.LoadDataInfo(FILE.baseall_hdfs_file, tb_dup) |
| ret = client.batch_load(util.get_label(), data_desc_list, is_wait=True, broker=broker_info) |
| assert ret |
| |
| sql1 = 'select count(*) from %s.%s' % (database_name, tb_dup) |
| sql2 = 'select count(*) from %s.baseall' % query_db |
| check2(client, sql1, sql2) |
| |
| |
| def test_shoot_1(): |
| """ |
| { |
| "title": "test_shoot_1", |
| "describe": "without where, sum命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| client.set_variables('test_materialized_view', 1) |
| assert client.show_variable('test_materialized_view')[0][1] == 'true' |
| |
| sql = 'select sum(k9) from %s.%s' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name2 in shoot_table or mv_name4 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_2(): |
| """ |
| { |
| "title": "test_shoot_2", |
| "describe": "where k1 = 1,点查询命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 = 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name2 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 = 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_3(): |
| """ |
| { |
| "title": "test_shoot_3", |
| "describe": "where k1 > 1,范围查询命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 > 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name2 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 > 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_4(): |
| """ |
| { |
| "title": "test_shoot_4", |
| "describe": "where k1=1 and k2=1,and 命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 = 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name4 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 = 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_5(): |
| """ |
| { |
| "title": "test_shoot_5", |
| "describe": "where k1>1 and k2=1,and命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 > 1 and k2 = 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name4 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 > 1 and k2 = 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_6(): |
| """ |
| { |
| "title": "test_shoot_6", |
| "describe": "where k4>1,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k4 > 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name3 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k4 > 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_7(): |
| """ |
| { |
| "title": "test_shoot_7", |
| "describe": "where k1=1 and k2>1,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 > 1' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name3 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 > 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_8(): |
| """ |
| { |
| "title": "test_shoot_8", |
| "describe": "where k1=1 and k2=1 and k3=1命中中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where k1 = 1 and k2 = 1 and k3 > 1' \ |
| % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name1 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where k1 = 1 and k2 = 1 and k3 > 1' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_9(): |
| """ |
| { |
| "title": "test_shoot_9", |
| "describe": " where cast,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(k9) from %s.%s where cast(k2 as int) < 10000' % (database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name4 in shoot_table |
| check_sql = 'select sum(k9) from %s.baseall where cast(k2 as int) < 10000' % query_db |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_10(): |
| """ |
| { |
| "title": "test_shoot_10", |
| "describe": "where join,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| # 左表 |
| |
| sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = b.k1' \ |
| % (database_name, tb_dup, database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name2 in shoot_table |
| check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on a.k1 = b.k1' \ |
| % (query_db, query_db) |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_11(): |
| """ |
| { |
| "title": "test_shoot_11", |
| "describe": "where join on,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = b.k1 and a.k2 = 2' \ |
| % (database_name, tb_dup, database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name4 in shoot_table |
| check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on a.k1 = b.k1 ' \ |
| 'and a.k2 = 2' % (query_db, query_db) |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_12(): |
| """ |
| { |
| "title": "test_shoot_12", |
| "describe": "where join on cas,命中rollup", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql = 'select sum(a.k9) from %s.%s a join %s.%s b on a.k1 = cast(hex(b.k1) as int) ' \ |
| 'and a.k2 = 2' % (database_name, tb_dup, database_name, tb_dup) |
| shoot_table = get_explain_table(client, sql) |
| LOG.info(L('shoot table:', shoot_table=shoot_table)) |
| assert mv_name4 in shoot_table |
| check_sql = 'select sum(a.k9) from %s.baseall a join %s.baseall b on ' \ |
| 'a.k1 = cast(hex(b.k1) as int) and a.k2 = 2' % (query_db, query_db) |
| check2(client, sql, check_sql) |
| |
| |
| def test_shoot_13(): |
| """ |
| { |
| "title": "test_shoot_13", |
| "describe": "count, hll_union, bitmap_union聚合查询测试", |
| "tag": "system,p1" |
| } |
| """ |
| client = palo_client.PaloClient(config.fe_host, config.fe_query_port) |
| client.init() |
| client.use(database_name) |
| |
| sql1 = 'select bitmap_union_count(to_bitmap(%s)) from %s where 1=1' |
| sql2 = 'select bitmap_union(to_bitmap(%s)) from %s where 1=1' |
| sql3 = 'select count(distinct %s) from %s where 1=1' |
| sql4 = 'select count(%s) from %s where 1=1' |
| sql5 = 'select hll_cardinality(hll_raw_agg(hll_hash(%s))) from %s where 1=1' |
| sql6 = 'select hll_union_agg(hll_hash(%s)) from %s where 1=1' |
| sql7 = 'select ndv(%s) from %s where 1=1' |
| sql8 = 'select approx_count_distinct(%s) from %s where 1=1' |
| check(client, sql1, 'k1', mv_name8) |
| check(client, sql2, 'k1', mv_name8) |
| check(client, sql3, 'k1', mv_name8) |
| check(client, sql4, 'k1', tb_dup) |
| check(client, sql5, 'k1', tb_dup) |
| check(client, sql6, 'k1', tb_dup) |
| check(client, sql7, 'k1', tb_dup) |
| check(client, sql8, 'k1', tb_dup) |
| check(client, sql3, 'k6', tb_dup) |
| check(client, sql4, 'k6', mv_name6) |
| check(client, sql5, 'k6', tb_dup) |
| check(client, sql6, 'k6', tb_dup) |
| check(client, sql7, 'k6', tb_dup) |
| check(client, sql8, 'k6', tb_dup) |
| check(client, sql3, 'k11', tb_dup) |
| check(client, sql4, 'k11', tb_dup) |
| check(client, sql5, 'k11', mv_name7) |
| check(client, sql6, 'k11', mv_name7) |
| check(client, sql7, 'k11', mv_name7) |
| check(client, sql8, 'k11', mv_name7) |
| |
| |
| def check(client, sql, col, mv): |
| """check explain rollup and result""" |
| sql1 = sql % (col, tb_dup) |
| sql2 = sql % (col, 'test_query_qa.baseall') |
| shoot_mv = get_explain_table(client, sql1) |
| print(mv, shoot_mv) |
| check2(client, sql1, sql2) |
| assert mv in shoot_mv |
| |
| |
| def teardown_module(): |
| """tear down""" |
| pass |
| |
| |
| |