blob: e870afa01b80e5e139305f16824b0e848254f802 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store;
import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.exec.record.BatchSchema;
import org.apache.drill.exec.record.BatchSchemaBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
import org.apache.drill.exec.util.JsonStringArrayList;
import org.apache.drill.exec.util.Text;
import com.google.common.io.Files;
import org.apache.drill.test.BaseTestQuery;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestImplicitFileColumns extends BaseTestQuery {
public static final String CSV = "csv";
public static final String MAIN = "main";
public static final String NESTED = "nested";
public static final String MAIN_FILE = MAIN + "." + CSV;
public static final String NESTED_FILE = NESTED + "." + CSV;
public static final Path FILES = Paths.get("files");
public static final Path NESTED_DIR = FILES.resolve(NESTED);
public static final Path JSON_TBL = Paths.get("scan", "jsonTbl"); // 1990/1.json : {id:100, name: "John"}, 1991/2.json : {id: 1000, name : "Joe"}
public static final Path PARQUET_TBL = Paths.get("multilevel", "parquet"); // 1990/Q1/orders_1990_q1.parquet, ...
public static final Path PARQUET_CHANGE_TBL = Paths.get("multilevel", "parquetWithSchemaChange");
public static final Path CSV_TBL = Paths.get("multilevel", "csv"); // 1990/Q1/orders_1990_q1.csv, ..
@SuppressWarnings("serial")
private static final JsonStringArrayList<Text> mainColumnValues = new JsonStringArrayList<Text>() {{
add(new Text(MAIN));
}};
@SuppressWarnings("serial")
private static final JsonStringArrayList<Text> nestedColumnValues = new JsonStringArrayList<Text>() {{
add(new Text(NESTED));
}};
private static File mainFile;
private static File nestedFile;
@BeforeClass
public static void setup() throws Exception {
File files = dirTestWatcher.makeRootSubDir(FILES);
mainFile = new File(files, MAIN_FILE);
Files.asCharSink(mainFile, StandardCharsets.UTF_8).write(MAIN);
File nestedFolder = new File(files, NESTED);
nestedFolder.mkdirs();
nestedFile = new File(nestedFolder, NESTED_FILE);
Files.asCharSink(nestedFile, StandardCharsets.UTF_8).write(NESTED);
dirTestWatcher.copyResourceToRoot(JSON_TBL);
dirTestWatcher.copyResourceToRoot(PARQUET_TBL);
dirTestWatcher.copyResourceToRoot(CSV_TBL);
dirTestWatcher.copyResourceToRoot(PARQUET_CHANGE_TBL);
}
@Test
public void testImplicitColumns() throws Exception {
testBuilder()
.sqlQuery("select *, filename, suffix, fqn, filepath from dfs.`%s` order by filename", FILES)
.ordered()
.baselineColumns("columns", "dir0", "filename", "suffix", "fqn", "filepath")
.baselineValues(mainColumnValues, null, mainFile.getName(), CSV, mainFile.getCanonicalPath(), mainFile.getParentFile().getCanonicalPath())
.baselineValues(nestedColumnValues, NESTED, NESTED_FILE, CSV, nestedFile.getCanonicalPath(), nestedFile.getParentFile().getCanonicalPath())
.go();
}
@Test
public void testImplicitColumnInWhereClause() throws Exception {
testBuilder()
.sqlQuery("select * from dfs.`%s` where filename = '%s'", NESTED_DIR, NESTED_FILE)
.unOrdered()
.baselineColumns("columns")
.baselineValues(nestedColumnValues)
.go();
}
@Test
public void testImplicitColumnAlone() throws Exception {
testBuilder()
.sqlQuery("select filename from dfs.`%s`", NESTED_DIR)
.unOrdered()
.baselineColumns("filename")
.baselineValues(NESTED_FILE)
.go();
}
@Test
public void testImplicitColumnWithTableColumns() throws Exception {
testBuilder()
.sqlQuery("select columns, filename from dfs.`%s`", NESTED_DIR)
.unOrdered()
.baselineColumns("columns", "filename")
.baselineValues(nestedColumnValues, NESTED_FILE)
.go();
}
@Test
public void testCountStarWithImplicitColumnsInWhereClause() throws Exception {
testBuilder()
.sqlQuery("select count(*) as cnt from dfs.`%s` where filename = '%s'", NESTED_DIR, NESTED_FILE)
.unOrdered()
.baselineColumns("cnt")
.baselineValues(1L)
.go();
}
@Test
public void testImplicitAndPartitionColumnsInSelectClause() throws Exception {
testBuilder()
.sqlQuery("select dir0, filename from dfs.`%s` order by filename", FILES)
.ordered()
.baselineColumns("dir0", "filename")
.baselineValues(null, MAIN_FILE)
.baselineValues(NESTED, NESTED_FILE)
.go();
}
@Test
public void testImplicitColumnsForParquet() throws Exception {
testBuilder()
.sqlQuery("select filename, suffix from cp.`tpch/region.parquet` limit 1")
.unOrdered()
.baselineColumns("filename", "suffix")
.baselineValues("region.parquet", "parquet")
.go();
}
@Test // DRILL-4733
public void testMultilevelParquetWithSchemaChange() throws Exception {
try {
test("alter session set `planner.enable_decimal_data_type` = true");
testBuilder()
.sqlQuery("select max(dir0) as max_dir from dfs.`%s`", PARQUET_CHANGE_TBL)
.unOrdered()
.baselineColumns("max_dir")
.baselineValues("voter50")
.go();
} finally {
test("alter session set `planner.enable_decimal_data_type` = false");
}
}
@Test
public void testStarColumnJson() throws Exception {
SchemaBuilder schemaBuilder = new SchemaBuilder()
.addNullable("id", TypeProtos.MinorType.BIGINT)
.addNullable("name", TypeProtos.MinorType.VARCHAR)
.addNullable("dir0", TypeProtos.MinorType.VARCHAR);
final BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();
testBuilder()
.sqlQuery("select * from dfs.`%s` ", JSON_TBL)
.schemaBaseLine(expectedSchema)
.build()
.run();
}
@Test
public void testStarColumnParquet() throws Exception {
SchemaBuilder schemaBuilder = new SchemaBuilder()
.addNullable("dir0", TypeProtos.MinorType.VARCHAR)
.addNullable("dir1", TypeProtos.MinorType.VARCHAR)
.add("o_orderkey", TypeProtos.MinorType.INT)
.add("o_custkey", TypeProtos.MinorType.INT)
.add("o_orderstatus", TypeProtos.MinorType.VARCHAR)
.add("o_totalprice", TypeProtos.MinorType.FLOAT8)
.add("o_orderdate", TypeProtos.MinorType.DATE)
.add("o_orderpriority", TypeProtos.MinorType.VARCHAR)
.add("o_clerk", TypeProtos.MinorType.VARCHAR)
.add("o_shippriority", TypeProtos.MinorType.INT)
.add("o_comment", TypeProtos.MinorType.VARCHAR);
final BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();
testBuilder()
.sqlQuery("select * from dfs.`%s` ", PARQUET_TBL)
.schemaBaseLine(expectedSchema)
.build()
.run();
}
@Test
public void testStarColumnCsv() throws Exception {
SchemaBuilder schemaBuilder = new SchemaBuilder()
.addArray("columns", TypeProtos.MinorType.VARCHAR)
.addNullable("dir0", TypeProtos.MinorType.VARCHAR)
.addNullable("dir1", TypeProtos.MinorType.VARCHAR);
final BatchSchema expectedSchema = new BatchSchemaBuilder()
.withSchemaBuilder(schemaBuilder)
.build();
testBuilder()
.sqlQuery("select * from dfs.`%s` ", CSV_TBL)
.schemaBaseLine(expectedSchema)
.build()
.run();
}
}