| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.drill.exec.store.hive; |
| |
| import java.io.File; |
| import java.io.PrintWriter; |
| import java.nio.file.Paths; |
| import java.sql.Date; |
| import java.sql.Timestamp; |
| |
| import org.apache.commons.io.FileUtils; |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.drill.exec.hive.HiveTestUtilities; |
| import com.google.common.io.Resources; |
| import org.apache.drill.test.BaseDirTestWatcher; |
| import org.apache.drill.test.BaseTestQuery; |
| import org.apache.hadoop.hive.ql.Driver; |
| import org.apache.hadoop.hive.serde.serdeConstants; |
| |
| import static org.apache.drill.exec.hive.HiveTestUtilities.createDirWithPosixPermissions; |
| import static org.apache.drill.exec.hive.HiveTestUtilities.executeQuery; |
| |
| public class HiveTestDataGenerator { |
| |
| private final BaseDirTestWatcher dirTestWatcher; |
| |
| private final File baseDir; |
| |
| private final String warehouseDir; |
| |
| public HiveTestDataGenerator(BaseDirTestWatcher dirTestWatcher, File baseDir, String warehouseDir) { |
| this.dirTestWatcher = dirTestWatcher; |
| this.baseDir = baseDir; |
| this.warehouseDir = warehouseDir; |
| } |
| |
| public void generateData(Driver hiveDriver) { |
| try { |
| generateDataInternal(hiveDriver); |
| } catch (Exception e) { |
| throw new RuntimeException("Exception was thrown while generating test data", e); |
| } |
| } |
| |
| private void generateDataInternal(Driver hiveDriver) throws Exception { |
| File part1Dir = createDirWithPosixPermissions(baseDir, "part1"); |
| // generate (key, value) test data |
| String testDataFile = generateTestDataFile(); |
| |
| // Create a (key, value) schema table with Text SerDe which is available in hive-serdes.jar |
| executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS default.kv(key INT, value STRING) " + |
| "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); |
| executeQuery(hiveDriver, "LOAD DATA LOCAL INPATH '" + testDataFile + "' OVERWRITE INTO TABLE default.kv"); |
| |
| // Create a (key, value) schema table in non-default database with RegexSerDe which is available in hive-contrib.jar |
| // Table with RegExSerde is expected to have columns of STRING type only. |
| executeQuery(hiveDriver, "CREATE DATABASE IF NOT EXISTS db1"); |
| executeQuery(hiveDriver, "CREATE TABLE db1.kv_db1(key STRING, value STRING) " + |
| "ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' " + |
| "WITH SERDEPROPERTIES (" + |
| " \"input.regex\" = \"([0-9]*), (.*_[0-9]*)\", " + |
| " \"output.format.string\" = \"%1$s, %2$s\"" + |
| ") "); |
| executeQuery(hiveDriver, "INSERT INTO TABLE db1.kv_db1 SELECT * FROM default.kv"); |
| |
| // Create an Avro format based table backed by schema in a separate file |
| final String avroCreateQuery = String.format("CREATE TABLE db1.avro " + |
| "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + |
| "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + |
| "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + |
| "TBLPROPERTIES ('avro.schema.url'='file:///%s')", |
| BaseTestQuery.getPhysicalFileFromResource("avro_test_schema.json").replace('\\', '/')); |
| |
| executeQuery(hiveDriver, avroCreateQuery); |
| executeQuery(hiveDriver, "INSERT INTO TABLE db1.avro SELECT * FROM default.kv"); |
| |
| executeQuery(hiveDriver, "USE default"); |
| |
| // create a table with no data |
| executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS empty_table(a INT, b STRING)"); |
| // delete the table location of empty table |
| File emptyTableLocation = new File(warehouseDir, "empty_table"); |
| if (emptyTableLocation.exists()) { |
| FileUtils.forceDelete(emptyTableLocation); |
| } |
| |
| // create a Hive table that has columns with data types which are supported for reading in Drill. |
| testDataFile = generateAllTypesDataFile(); |
| executeQuery(hiveDriver, |
| "CREATE TABLE IF NOT EXISTS readtest (" + |
| " binary_field BINARY," + |
| " boolean_field BOOLEAN," + |
| " tinyint_field TINYINT," + |
| " decimal0_field DECIMAL," + |
| " decimal9_field DECIMAL(6, 2)," + |
| " decimal18_field DECIMAL(15, 5)," + |
| " decimal28_field DECIMAL(23, 1)," + |
| " decimal38_field DECIMAL(30, 3)," + |
| " double_field DOUBLE," + |
| " float_field FLOAT," + |
| " int_field INT," + |
| " bigint_field BIGINT," + |
| " smallint_field SMALLINT," + |
| " string_field STRING," + |
| " varchar_field VARCHAR(50)," + |
| " timestamp_field TIMESTAMP," + |
| " date_field DATE," + |
| " char_field CHAR(10)" + |
| ") PARTITIONED BY (" + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part BINARY," + |
| " boolean_part BOOLEAN," + |
| " tinyint_part TINYINT," + |
| " decimal0_part DECIMAL," + |
| " decimal9_part DECIMAL(6, 2)," + |
| " decimal18_part DECIMAL(15, 5)," + |
| " decimal28_part DECIMAL(23, 1)," + |
| " decimal38_part DECIMAL(30, 3)," + |
| " double_part DOUBLE," + |
| " float_part FLOAT," + |
| " int_part INT," + |
| " bigint_part BIGINT," + |
| " smallint_part SMALLINT," + |
| " string_part STRING," + |
| " varchar_part VARCHAR(50)," + |
| " timestamp_part TIMESTAMP," + |
| " date_part DATE," + |
| " char_part CHAR(10)" + |
| ") ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + |
| "TBLPROPERTIES ('serialization.null.format'='') " |
| ); |
| |
| // Add a partition to table 'readtest' |
| executeQuery(hiveDriver, |
| "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part='binary', " + |
| " boolean_part='true', " + |
| " tinyint_part='64', " + |
| " decimal0_part='36.9', " + |
| " decimal9_part='36.9', " + |
| " decimal18_part='3289379872.945645', " + |
| " decimal28_part='39579334534534.35345', " + |
| " decimal38_part='363945093845093890.9', " + |
| " double_part='8.345', " + |
| " float_part='4.67', " + |
| " int_part='123456', " + |
| " bigint_part='234235', " + |
| " smallint_part='3455', " + |
| " string_part='string', " + |
| " varchar_part='varchar', " + |
| " timestamp_part='2013-07-05 17:01:00', " + |
| " date_part='2013-07-05', " + |
| " char_part='char')" |
| ); |
| |
| // Add a second partition to table 'readtest' which contains the same values as the first partition except |
| // for tinyint_part partition column |
| executeQuery(hiveDriver, |
| "ALTER TABLE readtest ADD IF NOT EXISTS PARTITION ( " + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part='binary', " + |
| " boolean_part='true', " + |
| " tinyint_part='65', " + |
| " decimal0_part='36.9', " + |
| " decimal9_part='36.9', " + |
| " decimal18_part='3289379872.945645', " + |
| " decimal28_part='39579334534534.35345', " + |
| " decimal38_part='363945093845093890.9', " + |
| " double_part='8.345', " + |
| " float_part='4.67', " + |
| " int_part='123456', " + |
| " bigint_part='234235', " + |
| " smallint_part='3455', " + |
| " string_part='string', " + |
| " varchar_part='varchar', " + |
| " timestamp_part='2013-07-05 17:01:00', " + |
| " date_part='2013-07-05', " + |
| " char_part='char')" |
| ); |
| |
| // Load data into table 'readtest' |
| executeQuery(hiveDriver, |
| String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE default.readtest PARTITION (" + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part='binary', " + |
| " boolean_part='true', " + |
| " tinyint_part='64', " + |
| " decimal0_part='36.9', " + |
| " decimal9_part='36.9', " + |
| " decimal18_part='3289379872.945645', " + |
| " decimal28_part='39579334534534.35345', " + |
| " decimal38_part='363945093845093890.9', " + |
| " double_part='8.345', " + |
| " float_part='4.67', " + |
| " int_part='123456', " + |
| " bigint_part='234235', " + |
| " smallint_part='3455', " + |
| " string_part='string', " + |
| " varchar_part='varchar', " + |
| " timestamp_part='2013-07-05 17:01:00', " + |
| " date_part='2013-07-05'," + |
| " char_part='char'" + |
| ")", testDataFile)); |
| |
| // create a table that has all Hive types. This is to test how hive tables metadata is populated in |
| // Drill's INFORMATION_SCHEMA. |
| executeQuery(hiveDriver, |
| "CREATE TABLE IF NOT EXISTS infoschematest(" + |
| "booleanType BOOLEAN, " + |
| "tinyintType TINYINT, " + |
| "smallintType SMALLINT, " + |
| "intType INT, " + |
| "bigintType BIGINT, " + |
| "floatType FLOAT, " + |
| "doubleType DOUBLE, " + |
| "dateType DATE, " + |
| "timestampType TIMESTAMP, " + |
| "binaryType BINARY, " + |
| "decimalType DECIMAL(38, 2), " + |
| "stringType STRING, " + |
| "varCharType VARCHAR(20), " + |
| "listType ARRAY<STRING>, " + |
| "mapType MAP<STRING,INT>, " + |
| "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " + |
| "uniontypeType UNIONTYPE<int, double, array<string>>, " + |
| "charType CHAR(10))" |
| ); |
| |
| /* |
| * Create a PARQUET table with all supported types. |
| */ |
| executeQuery(hiveDriver, |
| "CREATE TABLE readtest_parquet (" + |
| " binary_field BINARY, " + |
| " boolean_field BOOLEAN, " + |
| " tinyint_field TINYINT," + |
| " decimal0_field DECIMAL," + |
| " decimal9_field DECIMAL(6, 2)," + |
| " decimal18_field DECIMAL(15, 5)," + |
| " decimal28_field DECIMAL(23, 1)," + |
| " decimal38_field DECIMAL(30, 3)," + |
| " double_field DOUBLE," + |
| " float_field FLOAT," + |
| " int_field INT," + |
| " bigint_field BIGINT," + |
| " smallint_field SMALLINT," + |
| " string_field STRING," + |
| " varchar_field VARCHAR(50)," + |
| " timestamp_field TIMESTAMP," + |
| " char_field CHAR(10)" + |
| ") PARTITIONED BY (" + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part BINARY," + |
| " boolean_part BOOLEAN," + |
| " tinyint_part TINYINT," + |
| " decimal0_part DECIMAL," + |
| " decimal9_part DECIMAL(6, 2)," + |
| " decimal18_part DECIMAL(15, 5)," + |
| " decimal28_part DECIMAL(23, 1)," + |
| " decimal38_part DECIMAL(30, 3)," + |
| " double_part DOUBLE," + |
| " float_part FLOAT," + |
| " int_part INT," + |
| " bigint_part BIGINT," + |
| " smallint_part SMALLINT," + |
| " string_part STRING," + |
| " varchar_part VARCHAR(50)," + |
| " timestamp_part TIMESTAMP," + |
| " date_part DATE," + |
| " char_part CHAR(10)" + |
| ") STORED AS parquet " |
| ); |
| |
| executeQuery(hiveDriver, "INSERT OVERWRITE TABLE readtest_parquet " + |
| "PARTITION (" + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part='binary', " + |
| " boolean_part='true', " + |
| " tinyint_part='64', " + |
| " decimal0_part='36.9', " + |
| " decimal9_part='36.9', " + |
| " decimal18_part='3289379872.945645', " + |
| " decimal28_part='39579334534534.35345', " + |
| " decimal38_part='363945093845093890.9', " + |
| " double_part='8.345', " + |
| " float_part='4.67', " + |
| " int_part='123456', " + |
| " bigint_part='234235', " + |
| " smallint_part='3455', " + |
| " string_part='string', " + |
| " varchar_part='varchar', " + |
| " timestamp_part='2013-07-05 17:01:00', " + |
| " date_part='2013-07-05', " + |
| " char_part='char'" + |
| ") " + |
| " SELECT " + |
| " binary_field," + |
| " boolean_field," + |
| " tinyint_field," + |
| " decimal0_field," + |
| " decimal9_field," + |
| " decimal18_field," + |
| " decimal28_field," + |
| " decimal38_field," + |
| " double_field," + |
| " float_field," + |
| " int_field," + |
| " bigint_field," + |
| " smallint_field," + |
| " string_field," + |
| " varchar_field," + |
| " timestamp_field," + |
| " char_field" + |
| " FROM readtest WHERE tinyint_part = 64"); |
| |
| // Add a second partition to table 'readtest_parquet' which contains the same values as the first partition except |
| // for tinyint_part partition column |
| executeQuery(hiveDriver, |
| "ALTER TABLE readtest_parquet ADD PARTITION ( " + |
| // There is a regression in Hive 1.2.1 in binary type partition columns. Disable for now. |
| // " binary_part='binary', " + |
| " boolean_part='true', " + |
| " tinyint_part='65', " + |
| " decimal0_part='36.9', " + |
| " decimal9_part='36.9', " + |
| " decimal18_part='3289379872.945645', " + |
| " decimal28_part='39579334534534.35345', " + |
| " decimal38_part='363945093845093890.9', " + |
| " double_part='8.345', " + |
| " float_part='4.67', " + |
| " int_part='123456', " + |
| " bigint_part='234235', " + |
| " smallint_part='3455', " + |
| " string_part='string', " + |
| " varchar_part='varchar', " + |
| " timestamp_part='2013-07-05 17:01:00', " + |
| " date_part='2013-07-05', " + |
| " char_part='char')" |
| ); |
| |
| executeQuery(hiveDriver, "CREATE TABLE IF NOT EXISTS " + |
| "partition_pruning_test_loadtable(a DATE, b TIMESTAMP, c INT, d INT, e INT) " + |
| "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); |
| executeQuery(hiveDriver, |
| String.format("LOAD DATA LOCAL INPATH '%s' INTO TABLE partition_pruning_test_loadtable", |
| generateTestDataFileForPartitionInput())); |
| |
| // create partitioned hive table to test partition pruning |
| executeQuery(hiveDriver, |
| "CREATE TABLE IF NOT EXISTS partition_pruning_test(a DATE, b TIMESTAMP) "+ |
| "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); |
| executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_pruning_test PARTITION(c, d, e) " + |
| "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable"); |
| |
| executeQuery(hiveDriver, |
| "CREATE TABLE IF NOT EXISTS partition_with_few_schemas(a DATE, b TIMESTAMP) "+ |
| "partitioned by (c INT, d INT, e INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE"); |
| executeQuery(hiveDriver, "INSERT OVERWRITE TABLE partition_with_few_schemas PARTITION(c, d, e) " + |
| "SELECT a, b, c, d, e FROM partition_pruning_test_loadtable"); |
| executeQuery(hiveDriver,"alter table partition_with_few_schemas partition(c=1, d=1, e=1) change a a1 INT"); |
| executeQuery(hiveDriver,"alter table partition_with_few_schemas partition(c=1, d=1, e=2) change a a1 INT"); |
| executeQuery(hiveDriver,"alter table partition_with_few_schemas partition(c=2, d=2, e=2) change a a1 INT"); |
| |
| // Add a partition with custom location |
| executeQuery(hiveDriver, |
| String.format("ALTER TABLE partition_pruning_test ADD PARTITION (c=99, d=98, e=97) LOCATION '%s'", |
| part1Dir.getAbsolutePath())); |
| executeQuery(hiveDriver, |
| String.format("INSERT INTO TABLE partition_pruning_test PARTITION(c=99, d=98, e=97) " + |
| "SELECT '%s', '%s' FROM kv LIMIT 1", |
| new Date(System.currentTimeMillis()).toString(), new Timestamp(System.currentTimeMillis()).toString())); |
| |
| executeQuery(hiveDriver, "DROP TABLE partition_pruning_test_loadtable"); |
| |
| // Create a partitioned parquet table (DRILL-3938) |
| executeQuery(hiveDriver, |
| "CREATE TABLE kv_parquet(key INT, value STRING) PARTITIONED BY (part1 int) STORED AS PARQUET"); |
| executeQuery(hiveDriver, "INSERT INTO TABLE kv_parquet PARTITION(part1) SELECT key, value, key FROM default.kv"); |
| executeQuery(hiveDriver, "ALTER TABLE kv_parquet ADD COLUMNS (newcol string)"); |
| |
| // Create a StorageHandler based table (DRILL-3739) |
| executeQuery(hiveDriver, "CREATE TABLE kv_sh(key INT, value STRING) STORED BY " + |
| "'org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler'"); |
| // Insert fails if the table directory already exists for tables with DefaultStorageHandlers. Its a known |
| // issue in Hive. So delete the table directory created as part of the CREATE TABLE |
| FileUtils.deleteQuietly(new File(warehouseDir, "kv_sh")); |
| //executeQuery(hiveDriver, "INSERT OVERWRITE TABLE kv_sh SELECT * FROM kv"); |
| |
| // Create text tables with skip header and footer table property |
| executeQuery(hiveDriver, "create database if not exists skipper"); |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_small", "textfile", "1", "1", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_small", 5, 1, 1)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_large", "textfile", "2", "2", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_large", 5000, 2, 2)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_header", "textfile", "A", "1", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_header", 5, 1, 1)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_incorrect_skip_footer", "textfile", "1", "A", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_incorrect_skip_footer", 5, 1, 1)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_header_only", "textfile", "5", "0", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_header_only", 0, 5, 0)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_footer_only", "textfile", "0", "5", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_footer_only", 0, 0, 5)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_header_footer_only", "textfile", "5", "5", false)); |
| executeQuery(hiveDriver, generateTestDataWithHeadersAndFooters("skipper.kv_text_header_footer_only", 0, 5, 5)); |
| |
| executeQuery(hiveDriver, createTableWithHeaderFooterProperties("skipper.kv_text_with_part", "textfile", "5", "5", true)); |
| executeQuery(hiveDriver, "insert overwrite table skipper.kv_text_with_part partition (part) " + |
| "select key, value, key % 2 as part from skipper.kv_text_large"); |
| |
| // Create a table based on json file |
| executeQuery(hiveDriver, "create table default.simple_json(json string)"); |
| final String loadData = "load data local inpath '" + |
| Resources.getResource("simple.json") + "' into table default.simple_json"; |
| executeQuery(hiveDriver, loadData); |
| |
| createTestDataForDrillNativeParquetReaderTests(hiveDriver); |
| |
| createSubDirTable(hiveDriver, testDataFile); |
| |
| // hive views |
| executeQuery(hiveDriver, "CREATE OR REPLACE VIEW readtest_view AS SELECT * FROM readtest"); |
| executeQuery(hiveDriver, "CREATE VIEW IF NOT EXISTS hive_view AS SELECT * FROM kv"); |
| executeQuery(hiveDriver, "CREATE OR REPLACE VIEW kv_native_view AS SELECT * FROM kv_native"); |
| String disableRewrite = HiveTestUtilities.isHive3() ? "DISABLE REWRITE" : ""; |
| executeQuery(hiveDriver, String.format("CREATE MATERIALIZED VIEW IF NOT EXISTS hive_view_m %s AS SELECT * FROM kv WHERE key = 1", disableRewrite)); |
| executeQuery(hiveDriver, "CREATE OR REPLACE VIEW view_over_hive_view AS SELECT * FROM hive_view WHERE key BETWEEN 2 AND 3"); |
| executeQuery(hiveDriver, "CREATE OR REPLACE VIEW db1.two_table_view AS SELECT COUNT(dk.key) dk_key_count FROM db1.avro dk " + |
| "INNER JOIN kv ON kv.key = dk.key"); |
| |
| createTableWithEmptyParquet(hiveDriver); |
| } |
| |
| private void createTestDataForDrillNativeParquetReaderTests(Driver hiveDriver) { |
| // Hive managed table that has data qualified for Drill native filter push down |
| executeQuery(hiveDriver, "create table kv_native(key int, int_key int, var_key varchar(10), dec_key decimal(5, 2)) stored as parquet"); |
| // each insert is created in separate file |
| executeQuery(hiveDriver, "insert into table kv_native values (1, 1, 'var_1', 1.11), (1, 2, 'var_2', 2.22)"); |
| executeQuery(hiveDriver, "insert into table kv_native values (1, 3, 'var_3', 3.33), (1, 4, 'var_4', 4.44)"); |
| executeQuery(hiveDriver, "insert into table kv_native values (2, 5, 'var_5', 5.55), (2, 6, 'var_6', 6.66)"); |
| executeQuery(hiveDriver, "insert into table kv_native values (null, 7, 'var_7', 7.77), (null, 8, 'var_8', 8.88)"); |
| |
| // Hive external table which has three partitions |
| |
| // copy external table with data from test resources |
| dirTestWatcher.copyResourceToRoot(Paths.get("external")); |
| |
| File external = new File (baseDir, "external"); |
| String tableLocation = new File(external, "kv_native_ext").toURI().getPath(); |
| |
| executeQuery(hiveDriver, String.format("create external table kv_native_ext(key int) " + |
| "partitioned by (part_key int) " + |
| "stored as parquet location '%s'", |
| tableLocation)); |
| |
| /* |
| DATA: |
| key, part_key |
| 1, 1 |
| 2, 1 |
| 3, 2 |
| 4, 2 |
| */ |
| |
| // add partitions |
| |
| // partition in the same location as table |
| String firstPartition = new File(tableLocation, "part_key=1").toURI().getPath(); |
| executeQuery(hiveDriver, String.format("alter table kv_native_ext add partition (part_key = '1') " + |
| "location '%s'", firstPartition)); |
| |
| // partition in different location with table |
| String secondPartition = new File(external, "part_key=2").toURI().getPath(); |
| executeQuery(hiveDriver, String.format("alter table kv_native_ext add partition (part_key = '2') " + |
| "location '%s'", secondPartition)); |
| |
| // add empty partition |
| String thirdPartition = new File(dirTestWatcher.makeSubDir(Paths.get("empty_part")), "part_key=3").toURI().getPath(); |
| executeQuery(hiveDriver, String.format("alter table kv_native_ext add partition (part_key = '3') " + |
| "location '%s'", thirdPartition)); |
| } |
| |
| private void createSubDirTable(Driver hiveDriver, String testDataFile) { |
| String tableName = "sub_dir_table"; |
| dirTestWatcher.copyResourceToRoot(Paths.get(testDataFile), Paths.get(tableName, "sub_dir", "data.txt")); |
| |
| String tableLocation = Paths.get(dirTestWatcher.getRootDir().toURI().getPath(), tableName).toUri().getPath(); |
| |
| String tableDDL = String.format("create external table sub_dir_table (key int, value string) " + |
| "row format delimited fields terminated by ',' stored as textfile location '%s'", tableLocation); |
| executeQuery(hiveDriver, tableDDL); |
| } |
| |
| private void createTableWithEmptyParquet(Driver hiveDriver) { |
| String location = dirTestWatcher.makeSubDir(Paths.get("table_with_empty_parquet")).toURI().getPath(); |
| String emptyParquet = "empty.parquet"; |
| dirTestWatcher.copyResourceToRoot(Paths.get(Resources.getResource(emptyParquet).getPath()), |
| Paths.get(location, emptyParquet)); |
| |
| String tableDDL = String.format("create external table table_with_empty_parquet (id bigint, name string) " + |
| "stored as parquet location '%s'", location); |
| executeQuery(hiveDriver, tableDDL); |
| } |
| |
| private File getTempFile() throws Exception { |
| return java.nio.file.Files.createTempFile("drill-hive-test", ".txt").toFile(); |
| } |
| |
| private String generateTestDataFile() throws Exception { |
| File file = getTempFile(); |
| try (PrintWriter printWriter = new PrintWriter(file)) { |
| for (int i = 1; i <= 5; i++) { |
| printWriter.println(String.format("%d, key_%d", i, i)); |
| } |
| } |
| return file.getPath(); |
| } |
| |
| private String generateTestDataFileForPartitionInput() throws Exception { |
| File file = getTempFile(); |
| try (PrintWriter printWriter = new PrintWriter(file)) { |
| String[] partValues = {"1", "2", "null"}; |
| for (String partValue : partValues) { |
| for (String partValue1 : partValues) { |
| for (String partValue2 : partValues) { |
| for (int i = 1; i <= 5; i++) { |
| Date date = new Date(System.currentTimeMillis()); |
| Timestamp ts = new Timestamp(System.currentTimeMillis()); |
| printWriter.printf("%s,%s,%s,%s,%s", date.toString(), ts.toString(), partValue, partValue1, partValue2); |
| printWriter.println(); |
| } |
| } |
| } |
| } |
| } |
| |
| return file.getPath(); |
| } |
| |
| private String generateAllTypesDataFile() throws Exception { |
| File file = getTempFile(); |
| |
| try (PrintWriter printWriter = new PrintWriter(file)) { |
| printWriter.println("YmluYXJ5ZmllbGQ=,false,34,65.99,2347.923,2758725827.9999,29375892739852.7689,"+ |
| "89853749534593985.7834783,8.345,4.67,123456,234235,3455,stringfield,varcharfield,"+ |
| "2013-07-05 17:01:00,2013-07-05,charfield"); |
| printWriter.println(",,,,,,,,,,,,,,,,"); |
| } |
| |
| return file.getPath(); |
| } |
| |
| private String createTableWithHeaderFooterProperties(String tableName, |
| String format, |
| String headerValue, |
| String footerValue, |
| boolean hasPartitions) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("create table ").append(tableName); |
| sb.append(" (key int, value string) "); |
| if (hasPartitions) { |
| sb.append("partitioned by (part bigint) "); |
| } |
| sb.append(" stored as ").append(format); |
| sb.append(" tblproperties("); |
| sb.append("'").append(serdeConstants.HEADER_COUNT).append("'='").append(headerValue).append("'"); |
| sb.append(","); |
| sb.append("'").append(serdeConstants.FOOTER_COUNT).append("'='").append(footerValue).append("'"); |
| sb.append(")"); |
| |
| return sb.toString(); |
| } |
| |
| private String generateTestDataWithHeadersAndFooters(String tableName, int rowCount, int headerLines, int footerLines) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("insert into table ").append(tableName).append(" values "); |
| sb.append(StringUtils.repeat("('key_header', 'value_header')", ",", headerLines)); |
| if (headerLines > 0) { |
| sb.append(","); |
| } |
| for (int i = 1; i <= rowCount; i++) { |
| sb.append("(").append(i).append(",").append("'key_").append(i).append("'),"); |
| } |
| if (footerLines <= 0) { |
| sb.deleteCharAt(sb.length() - 1); |
| } |
| sb.append(StringUtils.repeat("('key_footer', 'value_footer')", ",", footerLines)); |
| |
| return sb.toString(); |
| } |
| } |