blob: 1b4fbb6ac5931ef2015074e6fd790303f678b877 [file] [log] [blame]
#!/bin/env python
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
the data set for test schema change on palo
Date: 2015/03/25 11:48:37
"""
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from lib import palo_config
file_dir = os.path.abspath(os.path.dirname(__file__))
schema = [("tinyint_key", "TINYINT"), \
("smallint_key", "SMALLINT"), \
("int_key", "INT"), \
("bigint_key", "BIGINT"), \
("char_50_key", "CHAR(50)"), \
("character_key", "VARCHAR(500)"), \
("char_key", "CHAR"), \
("character_most_key", "VARCHAR(65533)"), \
("decimal_key", "DECIMAL(20, 6)"), \
("decimal_most_key", "DECIMAL(27, 9)"), \
("date_key", "DATE"), \
("datetime_key", "DATETIME"), \
("tinyint_value", "TINYINT", "SUM"), \
("smallint_value", "SMALLINT", "SUM"), \
("int_value", "int", "SUM"), \
("bigint_value", "BIGINT", "SUM"), \
("char_50_value", "CHAR(50)", "REPLACE"), \
("character_value", "VARCHAR(500)", "REPLACE"), \
("char_value", "CHAR", "REPLACE"), \
("character_most_value", "VARCHAR(65533)", "REPLACE"), \
("decimal_value", "DECIMAL(20, 6)", "SUM"), \
("decimal_most_value", "DECIMAL(27, 9)", "SUM"), \
("date_value_max", "DATE", "max"), \
("date_value_replace", "DATE", "REPLACE"), \
("date_value_min", "DATE", "min"), \
("datetime_value_max", "DATETIME", "MAX"), \
("datetime_value_replace", "DATETIME", "REPLACE"), \
("datetime_value_min", "DATETIME", "MIN"), \
("float_value", "FLOAT", "SUM"), \
("double_value", "DOUBLE", "SUM")]
schema_dup = schema_uniq = [("tinyint_key", "TINYINT"), \
("smallint_key", "SMALLINT"), \
("int_key", "INT"), \
("bigint_key", "BIGINT"), \
("char_50_key", "CHAR(50)"), \
("character_key", "VARCHAR(500)"), \
("char_key", "CHAR"), \
("character_most_key", "VARCHAR(65533)"), \
("decimal_key", "DECIMAL(20, 6)"), \
("decimal_most_key", "DECIMAL(27, 9)"), \
("date_key", "DATE"), \
("datetime_key", "DATETIME"), \
("tinyint_value", "TINYINT"), \
("smallint_value", "SMALLINT"), \
("int_value", "int"), \
("bigint_value", "BIGINT"), \
("char_50_value", "CHAR(50)"), \
("character_value", "VARCHAR(500)"), \
("char_value", "CHAR"), \
("character_most_value", "VARCHAR(65533)"), \
("decimal_value", "DECIMAL(20, 6)"), \
("decimal_most_value", "DECIMAL(27, 9)"), \
("date_value_replace", "DATE"), \
("date_value_max", "DATE"), \
("date_value_min", "DATE"), \
("datetime_value_replace", "DATETIME"), \
("datetime_value_max", "DATETIME"), \
("datetime_value_min", "DATETIME"), \
("float_value", "FLOAT"), \
("double_value", "DOUBLE")]
key_dup = "DUPLICATE KEY(tinyint_key, smallint_key, int_key, bigint_key, char_50_key," \
"character_key, char_key, character_most_key, decimal_key, decimal_most_key," \
"date_key, datetime_key)"
key_uniq = "UNIQUE KEY(tinyint_key, smallint_key, int_key, bigint_key, char_50_key," \
"character_key, char_key, character_most_key, decimal_key, decimal_most_key," \
"date_key, datetime_key)"
file_path = palo_config.gen_remote_file_path('sys/all_type.txt')
expected_data_file_list_delete_key_1 = '%s/all_type_834' % file_dir
expected_data_file_list_delete_key_2 = '%s/SCHEMA_CHANGE/del_data' % file_dir
expected_data_file_list_delete_key_2_new = '%s/SCHEMA_CHANGE/del_data_new' % file_dir
expected_data_file_list_delete_key_2_new_agg = '%s/SCHEMA_CHANGE/del_data_new_agg' % file_dir
expected_data_file_list_delete_key_3 = '%s/SCHEMA_CHANGE/del_data_b' % file_dir
expected_data_file_list_delete_key_1_dup = '%s/all_type_834_dup' % file_dir
storage_type = "column"
random_partition_type="random"
random_partition_num = 13
push_partition_num = 103
range_partition_type = "range(tinyint_key, int_key)"
range_list = [("-1", "-4", )]
hash_partition_type = "hash(tinyint_key, int_key)"
hash_partition_num = 15
#load
column_name_list = [column[0] for column in schema]
#delete
delete_condition_list = [("tinyint_key", "=", "1")]
delete_condition_list_2 = [("tinyint_key", "=", "-128")]
delete_conditions_list = [[("tinyint_key", "=", "1")], \
[("tinyint_key", "=", "2")], \
[("tinyint_key", "=", "3")], \
[("tinyint_key", "=", "4")], \
[("tinyint_key", "=", "5")], \
[("tinyint_key", "=", "6")], \
[("tinyint_key", "=", "7")], \
[("tinyint_key", "=", "8")], \
[("tinyint_key", "=", "9")]]
#rollup
rollup_column_name_list = ["tinyint_key", "int_key", "char_value", "tinyint_value"]
#schema change
drop_column_name_list = ["decimal_key", "bigint_value", "datetime_value_min"]
drop_column_name_list_new = ["bigint_value", "datetime_value_min"]
#START FOR BASIC SCHEMA CHANGE CASES
file_path_1 = palo_config.gen_remote_file_path('sys/schema_change/a_raw')
file_path_2 = palo_config.gen_remote_file_path('sys/schema_change/a_k2_k1_v3_v2_del_v1_add_v4')
data_file_varchar_add_length100 = ['%s/SCHEMA_CHANGE/basemodify100' % file_dir]
expected_data_file_varchar_lenth_change1 = ['%s/SCHEMA_CHANGE/wjbase' % file_dir]
expected_data_file_varchar_lenth_change2 = ['%s/SCHEMA_CHANGE/baseadd100' % file_dir]
expected_data_file_varchar_lenth_change3 = ['%s/SCHEMA_CHANGE/baseadd100_2' % file_dir]
expected_data_file_list_1 = ['%s/SCHEMA_CHANGE/a_raw' % file_dir]
expected_data_file_list_2 = ['%s/SCHEMA_CHANGE/a_add_col_mid_key' % file_dir]
expected_data_file_list_3 = ['%s/SCHEMA_CHANGE/a_add_col_after_key' % file_dir]
expected_data_file_list_4 = ['%s/SCHEMA_CHANGE/a_add_col_in_value' % file_dir]
expected_data_file_list_5 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5' % file_dir]
expected_data_file_list_6 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6' % file_dir]
expected_data_file_list_7 = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_v7' % file_dir]
expected_data_file_list_8 = ['%s/SCHEMA_CHANGE/a_add_col_k3_v4_v5_v6_v7' % file_dir]
expected_data_file_list_9 = ['%s/SCHEMA_CHANGE/a_drop_col_v1' % file_dir]
expected_data_file_list_10 = ['%s/SCHEMA_CHANGE/a_drop_col_v1_v2' % file_dir]
expected_data_file_list_11 = ['%s/SCHEMA_CHANGE/a_drop_col_k2' % file_dir]
expected_data_file_list_12 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v2_v1_v3' % file_dir]
expected_data_file_list_13 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v3_v2_v1' % file_dir]
expected_data_file_list_14 = ['%s/SCHEMA_CHANGE/a_order_k1_k2_v2_v3_v1' % file_dir]
expected_data_file_list_15 = ['%s/SCHEMA_CHANGE/a_order_k2_k1_v3_v2_v1' % file_dir]
expected_data_file_list_16 = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4' % file_dir]
expected_data_file_list_17 = ['%s/SCHEMA_CHANGE/all_type_after_delete' % file_dir]
expected_data_file_list_18 = ['%s/SCHEMA_CHANGE/all_type_833' % file_dir]
expected_data_file_list_19 = ['%s/SCHEMA_CHANGE/all_type_after_schema_change' % file_dir]
expected_data_file_list_20 = ['%s/SCHEMA_CHANGE/all_data_after_sc_then_del' % file_dir]
expected_data_file_list_21 = ['%s/SCHEMA_CHANGE/all_data_after_sc_then_delete' % file_dir]
expected_data_file_list_18_dup = ['%s/SCHEMA_CHANGE/all_type_833_dup' % file_dir]
expected_data_file_list_8_agg = ['%s/SCHEMA_CHANGE/a_add_col_k3_v4_v5_v6_v7_agg' % file_dir]
expected_data_file_list_4_agg = ['%s/SCHEMA_CHANGE/a_add_col_in_value_agg' % file_dir]
expected_data_file_list_5_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_agg' % file_dir]
expected_data_file_list_6_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_agg' % file_dir]
expected_data_file_list_7_agg = ['%s/SCHEMA_CHANGE/a_add_col_v4_v5_v6_v7_agg' % file_dir]
expected_data_file_list_16_agg = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4_agg' % file_dir]
expected_data_file_list_16_agg_new = ['%s/SCHEMA_CHANGE/a_k2_k1_v3_v2_del_v1_add_v4_agg_new' % file_dir]
schema_1 = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)', 'REPLACE'), \
('v2', 'FLOAT', 'SUM'), \
('v3', 'DECIMAL(20,7)', 'SUM')]
schema_1_new = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)'), \
('v2', 'FLOAT', 'SUM'), \
('v3', 'DECIMAL(20,7)', 'SUM')]
schema_1_alter_modify_date = [('k1', 'INT'), \
('k2', 'VARCHAR(4096)'), \
('k3', 'VARCHAR(4096)'), \
('v1', 'FLOAT', 'SUM'), \
('v2', 'DECIMAL(20,7)', 'SUM')]
schema_1_alter_modify_date_null = [('k1', 'INT'), \
('k2', 'VARCHAR(4096)' 'NULL'), \
('k3', 'VARCHAR(4096)'), \
('v1', 'FLOAT', 'SUM'), \
('v2', 'DECIMAL(20,7)', 'SUM')]
schema_1_alter_modify_number = [('k1', 'INT'), \
('k2', 'VARCHAR(4096)'), \
('k3', 'VARCHAR(4096)'), \
('k4', 'VARCHAR(4096)'), \
('k5', 'VARCHAR(4096)'), \
('k6', 'VARCHAR(4096)'), \
('k7', 'VARCHAR(4096)'), \
('k8', 'VARCHAR(4096)'), \
('k9', 'VARCHAR(4096)'), \
('v1', 'FLOAT', 'SUM'), \
('v2', 'DECIMAL(20,7)', 'SUM')]
schema_varchar_lenth_change = [('k1', 'INT'), \
('k3', 'VARCHAR(20)'), \
('k2', 'INT'), \
('v1', 'VARCHAR(50)', 'REPLACE'), \
('v2', 'FLOAT', 'SUM'), \
('v3', 'DECIMAL(20,7)', 'SUM')]
schema_1_dup = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)'), \
('v2', 'FLOAT'), \
('v3', 'DECIMAL(20,7)')]
schema_1_uniq = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)'), \
('v2', 'FLOAT'), \
('v3', 'DECIMAL(20,7)')]
schema_1_new_dup = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)'), \
('v2', 'FLOAT'), \
('v3', 'DECIMAL(20,7)')]
schema_1_new_uniq = [('k1', 'INT'), \
('k2', 'INT'), \
('v1', 'VARCHAR(4096)'), \
('v2', 'FLOAT'), \
('v3', 'DECIMAL(20,7)')]
key_1_dup = "DUPLICATE KEY(k1,k2)"
key_1_uniq = "UNIQUE KEY(k1,k2)"
add_key_partition_type = 'range(k1)'
add_key_range_list = [("150")]
add_key_set_max_partition = True
#END FOR BASIC SCHEMA CHANGE CASES
schema_2 = [('k1', 'INT'), \
('k2', 'INT'), \
('k3', 'INT'), \
('v1', 'INT', 'SUM'), \
('v2', 'INT', 'SUM'), \
('v3', 'INT', 'SUM')]
schema_2_dup = schema_2_uniq = [('k1', 'INT'), \
('k2', 'INT'), \
('k3', 'INT'), \
('v1', 'INT'), \
('v2', 'INT'), \
('v3', 'INT')]
file_path_3 = palo_config.gen_remote_file_path('sys/schema_change/delete_k1')
file_path_4 = palo_config.gen_remote_file_path('sys/schema_change/delete_k1_delta')
rollup_field_list_1 = ['k1', 'v1']
rollup_field_list_2 = ['k1', 'k2', 'v1']
rollup_field_key_dul_1 = "duplicate key(k1)"