| #!/bin/env python |
| # -*- coding: utf-8 -*- |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| """ |
| the data set for test schema change on palo |
| Date: 2015/03/25 11:48:37 |
| """ |
| import sys |
| import os |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))) |
| from lib import palo_config |
| file_dir = os.path.abspath(os.path.dirname(__file__)) |
| |
| schema = [("tinyint_key", "TINYINT"), \ |
| ("smallint_key", "SMALLINT"), \ |
| ("int_key", "INT"), \ |
| ("bigint_key", "BIGINT"), \ |
| ("char_50_key", "CHAR(50)"), \ |
| ("character_key", "VARCHAR(500)"), \ |
| ("char_key", "CHAR"), \ |
| ("character_most_key", "VARCHAR(65533)"), \ |
| ("decimal_key", "DECIMAL(20, 6)"), \ |
| ("decimal_most_key", "DECIMAL(27, 9)"), \ |
| ("date_key", "DATE"), \ |
| ("datetime_key", "DATETIME"), \ |
| ("tinyint_value", "TINYINT", "SUM"), \ |
| ("smallint_value", "SMALLINT", "SUM"), \ |
| ("int_value", "int", "SUM"), \ |
| ("bigint_value", "BIGINT", "SUM"), \ |
| ("char_50_value", "CHAR(50)", "REPLACE"), \ |
| ("character_value", "VARCHAR(500)", "REPLACE"), \ |
| ("char_value", "CHAR", "REPLACE"), \ |
| ("character_most_value", "VARCHAR(65533)", "REPLACE"), \ |
| ("decimal_value", "DECIMAL(20, 6)", "SUM"), \ |
| ("decimal_most_value", "DECIMAL(27, 9)", "SUM"), \ |
| ("date_value_max", "DATE", "max"), \ |
| ("date_value_replace", "DATE", "REPLACE"), \ |
| ("date_value_min", "DATE", "min"), \ |
| ("datetime_value_max", "DATETIME", "MAX"), \ |
| ("datetime_value_replace", "DATETIME", "REPLACE"), \ |
| ("datetime_value_min", "DATETIME", "MIN"), \ |
| ("float_value", "FLOAT", "SUM"), \ |
| ("double_value", "DOUBLE", "SUM")] |
| |
| schema_dup = schema_uniq = [("tinyint_key", "TINYINT"), \ |
| ("smallint_key", "SMALLINT"), \ |
| ("int_key", "INT"), \ |
| ("bigint_key", "BIGINT"), \ |
| ("char_50_key", "CHAR(50)"), \ |
| ("character_key", "VARCHAR(500)"), \ |
| ("char_key", "CHAR"), \ |
| ("character_most_key", "VARCHAR(65533)"), \ |
| ("decimal_key", "DECIMAL(20, 6)"), \ |
| ("decimal_most_key", "DECIMAL(27, 9)"), \ |
| ("date_key", "DATE"), \ |
| ("datetime_key", "DATETIME"), \ |
| ("tinyint_value", "TINYINT"), \ |
| ("smallint_value", "SMALLINT"), \ |
| ("int_value", "int"), \ |
| ("bigint_value", "BIGINT"), \ |
| ("char_50_value", "CHAR(50)"), \ |
| ("character_value", "VARCHAR(500)"), \ |
| ("char_value", "CHAR"), \ |
| ("character_most_value", "VARCHAR(65533)"), \ |
| ("decimal_value", "DECIMAL(20, 6)"), \ |
| ("decimal_most_value", "DECIMAL(27, 9)"), \ |
| ("date_value_replace", "DATE"), \ |
| ("date_value_max", "DATE"), \ |
| ("date_value_min", "DATE"), \ |
| ("datetime_value_replace", "DATETIME"), \ |
| ("datetime_value_max", "DATETIME"), \ |
| ("datetime_value_min", "DATETIME"), \ |
| ("float_value", "FLOAT"), \ |
| ("double_value", "DOUBLE")] |
| |
| key_dup = "DUPLICATE KEY(tinyint_key, smallint_key, int_key, bigint_key, char_50_key," \ |
| "character_key, char_key, character_most_key, decimal_key, decimal_most_key," \ |
| "date_key, datetime_key)" |
| key_uniq = "UNIQUE KEY(tinyint_key, smallint_key, int_key, bigint_key, char_50_key," \ |
| "character_key, char_key, character_most_key, decimal_key, decimal_most_key," \ |
| "date_key, datetime_key)" |
| |
| file_path = palo_config.gen_remote_file_path('sys/all_type.txt') |
| expected_data_file_list_delete_key_1 = '%s/all_type_834' % file_dir |
| expected_data_file_list_delete_key_2 = '%s/SCHEMA_CHANGE/del_data' % file_dir |
| expected_data_file_list_delete_key_2_new = '%s/SCHEMA_CHANGE/del_data_new' % file_dir |
| expected_data_file_list_delete_key_2_new_agg = '%s/SCHEMA_CHANGE/del_data_new_agg' % file_dir |
| expected_data_file_list_delete_key_3 = '%s/SCHEMA_CHANGE/del_data_b' % file_dir |
| expected_data_file_list_delete_key_1_dup = '%s/all_type_834_dup' % file_dir |
| |
| |
| storage_type = "column" |
| |
| random_partition_type="random" |
| random_partition_num = 13 |
| push_partition_num = 103 |
| |
| range_partition_type = "range(tinyint_key, int_key)" |
| range_list = [("-1", "-4", )] |
| |
| hash_partition_type = "hash(tinyint_key, int_key)" |
| hash_partition_num = 15 |
| |
| #load |
| column_name_list = [column[0] for column in schema] |
| |
| #delete |
| delete_condition_list = [("tinyint_key", "=", "1")] |
| delete_condition_list_2 = [("tinyint_key", "=", "-128")] |
| delete_conditions_list = [[("tinyint_key", "=", "1")], \ |
| [("tinyint_key", "=", "2")], \ |
| [("tinyint_key", "=", "3")], \ |
| [("tinyint_key", "=", "4")], \ |
| [("tinyint_key", "=", "5")], \ |
| [("tinyint_key", "=", "6")], \ |
| [("tinyint_key", "=", "7")], \ |
| [("tinyint_key", "=", "8")], \ |
| [("tinyint_key", "=", "9")]] |
| |
| #rollup |
| rollup_column_name_list = ["tinyint_key", "int_key", "char_value", "tinyint_value"] |
| |
| #schema change |
| drop_column_name_list = ["decimal_key", "bigint_value", "datetime_value_min"] |
| drop_column_name_list_new = ["bigint_value", "datetime_value_min"] |
| |
| #START FOR BASIC SCHEMA CHANGE CASES |
| file_path_1 = palo_config.gen_remote_file_path('sys/schema_change/a_raw') |
| file_path_2 = palo_config.gen_remote_file_path('sys/schema_change/a_k2_k1_v3_v2_del_v1_add_v4') |
| |
| data_file_varchar_add_length100 = ['%s/SCHEMA_CHANGE/basemodify100' % file_dir] |
| expected_data_file_varchar_lenth_change1 = ['%s/SCHEMA_CHANGE/wjbase' % file_dir] |
| expected_data_file_varchar_lenth_change2 = ['%s/SCHEMA_CHANGE/baseadd100' % file_dir] |
| expected_data_file_varchar_lenth_change3 = ['%s/SCHEMA_CHANGE/baseadd100_2' % file_dir] |
| |
| expected_data_file_list_1 = ['%s/SCHEMA_CHANGE/a_raw' % file_dir] |
| expected_data_file_list_2 = ['%s/SCHEMA_CHANGE/a_add_col_mid_key' % file_dir] |
| expected_data_file_list_3 = ['%s/SCHEMA_CHANGE/a_add_col_after_key' % file_dir] |
| expected_data_file_list_4 = ['%s/SCHEMA_CHANGE/a_add_col_in_value' % file_dir] |
| expected_data_file_list_5 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5' % file_dir] |
| expected_data_file_list_6 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6' % file_dir] |
| expected_data_file_list_7 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_v7' % file_dir] |
| expected_data_file_list_8 = ['%s/SCHEMA_CHANGE/a_add_col_k3_v4_v5_v6_v7' % file_dir] |
| expected_data_file_list_9 = ['%s/SCHEMA_CHANGE/a_drop_col_v1' % file_dir] |
| expected_data_file_list_10 = ['%s/SCHEMA_CHANGE/a_drop_col_v1_v2' % file_dir] |
| expected_data_file_list_11 = ['%s/SCHEMA_CHANGE/a_drop_col_k2' % file_dir] |
| expected_data_file_list_12 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v2_v1_v3' % file_dir] |
| expected_data_file_list_13 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v3_v2_v1' % file_dir] |
| expected_data_file_list_14 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v2_v3_v1' % file_dir] |
| expected_data_file_list_15 = ['%s/SCHEMA_CHANGE/a_order_k2_k1_v3_v2_v1' % file_dir] |
| expected_data_file_list_16 = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4' % file_dir] |
| expected_data_file_list_17 = ['%s/SCHEMA_CHANGE/all_type_after_delete' % file_dir] |
| expected_data_file_list_18 = ['%s/SCHEMA_CHANGE/all_type_833' % file_dir] |
| expected_data_file_list_19 = ['%s/SCHEMA_CHANGE/all_type_after_schema_change' % file_dir] |
| expected_data_file_list_20 = ['%s/SCHEMA_CHANGE/all_data_after_sc_then_del' % file_dir] |
| expected_data_file_list_21 = ['%s/SCHEMA_CHANGE/all_data_after_sc_then_delete' % file_dir] |
| |
| expected_data_file_list_18_dup = ['%s/SCHEMA_CHANGE/all_type_833_dup' % file_dir] |
| expected_data_file_list_8_agg = ['%s/SCHEMA_CHANGE/a_add_col_k3_v4_v5_v6_v7_agg' % file_dir] |
| expected_data_file_list_4_agg = ['%s/SCHEMA_CHANGE/a_add_col_in_value_agg' % file_dir] |
| expected_data_file_list_5_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_agg' % file_dir] |
| expected_data_file_list_6_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_agg' % file_dir] |
| expected_data_file_list_7_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_v7_agg' % file_dir] |
| expected_data_file_list_16_agg = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4_agg' % file_dir] |
| expected_data_file_list_16_agg_new = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4_agg_new' % file_dir] |
| |
| schema_1 = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)', 'REPLACE'), \ |
| ('v2', 'FLOAT', 'SUM'), \ |
| ('v3', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_1_new = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)'), \ |
| ('v2', 'FLOAT', 'SUM'), \ |
| ('v3', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_1_alter_modify_date = [('k1', 'INT'), \ |
| ('k2', 'VARCHAR(4096)'), \ |
| ('k3', 'VARCHAR(4096)'), \ |
| ('v1', 'FLOAT', 'SUM'), \ |
| ('v2', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_1_alter_modify_date_null = [('k1', 'INT'), \ |
| ('k2', 'VARCHAR(4096)' 'NULL'), \ |
| ('k3', 'VARCHAR(4096)'), \ |
| ('v1', 'FLOAT', 'SUM'), \ |
| ('v2', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_1_alter_modify_number = [('k1', 'INT'), \ |
| ('k2', 'VARCHAR(4096)'), \ |
| ('k3', 'VARCHAR(4096)'), \ |
| ('k4', 'VARCHAR(4096)'), \ |
| ('k5', 'VARCHAR(4096)'), \ |
| ('k6', 'VARCHAR(4096)'), \ |
| ('k7', 'VARCHAR(4096)'), \ |
| ('k8', 'VARCHAR(4096)'), \ |
| ('k9', 'VARCHAR(4096)'), \ |
| ('v1', 'FLOAT', 'SUM'), \ |
| ('v2', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_varchar_lenth_change = [('k1', 'INT'), \ |
| ('k3', 'VARCHAR(20)'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(50)', 'REPLACE'), \ |
| ('v2', 'FLOAT', 'SUM'), \ |
| ('v3', 'DECIMAL(20,7)', 'SUM')] |
| |
| schema_1_dup = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)'), \ |
| ('v2', 'FLOAT'), \ |
| ('v3', 'DECIMAL(20,7)')] |
| |
| schema_1_uniq = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)'), \ |
| ('v2', 'FLOAT'), \ |
| ('v3', 'DECIMAL(20,7)')] |
| |
| schema_1_new_dup = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)'), \ |
| ('v2', 'FLOAT'), \ |
| ('v3', 'DECIMAL(20,7)')] |
| |
| schema_1_new_uniq = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('v1', 'VARCHAR(4096)'), \ |
| ('v2', 'FLOAT'), \ |
| ('v3', 'DECIMAL(20,7)')] |
| |
| key_1_dup = "DUPLICATE KEY(k1,k2)" |
| key_1_uniq = "UNIQUE KEY(k1,k2)" |
| |
| add_key_partition_type = 'range(k1)' |
| add_key_range_list = [("150")] |
| add_key_set_max_partition = True |
| #END FOR BASIC SCHEMA CHANGE CASES |
| |
| schema_2 = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('k3', 'INT'), \ |
| ('v1', 'INT', 'SUM'), \ |
| ('v2', 'INT', 'SUM'), \ |
| ('v3', 'INT', 'SUM')] |
| |
| schema_2_dup = schema_2_uniq = [('k1', 'INT'), \ |
| ('k2', 'INT'), \ |
| ('k3', 'INT'), \ |
| ('v1', 'INT'), \ |
| ('v2', 'INT'), \ |
| ('v3', 'INT')] |
| |
| file_path_3 = palo_config.gen_remote_file_path('sys/schema_change/delete_k1') |
| file_path_4 = palo_config.gen_remote_file_path('sys/schema_change/delete_k1_delta') |
| |
| rollup_field_list_1 = ['k1', 'v1'] |
| rollup_field_list_2 = ['k1', 'k2', 'v1'] |
| rollup_field_key_dul_1 = "duplicate key(k1)" |
| |