blob: d461628518f5e8fb93d87a0baae12ca4cc9d6791 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.physical.impl.scan;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.util.List;
import org.apache.drill.categories.RowSetTest;
import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager;
import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions;
import org.apache.drill.exec.physical.impl.scan.file.PartitionColumn;
import org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn;
import org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn;
import org.apache.drill.exec.physical.rowSet.RowSetTestUtils;
import org.apache.drill.exec.physical.impl.scan.project.ColumnProjection;
import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
import com.google.common.collect.Lists;
import org.apache.drill.test.SubOperatorTest;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category(RowSetTest.class)
public class TestImplicitColumnParser extends SubOperatorTest {
private ImplicitColumnOptions standardOptions(Path filePath) {
return standardOptions(Lists.newArrayList(filePath));
}
private ImplicitColumnOptions standardOptions(List<Path> files) {
ImplicitColumnOptions options = new ImplicitColumnOptions();
options.useLegacyWildcardExpansion(false); // Don't expand partition columns for wildcard
options.setSelectionRoot(new Path("hdfs:///w"));
options.setFiles(files);
return options;
}
@Test
public void testBasics() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
// Simulate SELECT a, b, c ...
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList("a", "b", "c"),
Lists.newArrayList(implictColManager.projectionParser()));
// Verify
assertFalse(scanProj.projectAll());
assertFalse(implictColManager.hasImplicitCols());
}
/**
* Test including file metadata (AKA "implicit columns") in the project
* list.
*/
@Test
public void testFileMetadataColumnSelection() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
// Simulate SELECT a, fqn, filEPath, filename, suffix ...
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList("a",
ScanTestUtils.FULLY_QUALIFIED_NAME_COL,
"filEPath", // Sic, to test case sensitivity
ScanTestUtils.FILE_NAME_COL,
ScanTestUtils.SUFFIX_COL),
Lists.newArrayList(implictColManager.projectionParser()));
assertFalse(scanProj.projectAll());
assertEquals(5, scanProj.requestedCols().size());
assertEquals(5, scanProj.columns().size());
assertEquals("a", scanProj.columns().get(0).name());
assertEquals(ScanTestUtils.FULLY_QUALIFIED_NAME_COL, scanProj.columns().get(1).name());
assertEquals("filEPath", scanProj.columns().get(2).name());
assertEquals(ScanTestUtils.FILE_NAME_COL, scanProj.columns().get(3).name());
assertEquals(ScanTestUtils.SUFFIX_COL, scanProj.columns().get(4).name());
// Verify column type
assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumn);
assertTrue(scanProj.columns().get(1) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(3) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(4) instanceof FileMetadataColumn);
assertTrue(implictColManager.hasImplicitCols());
}
/**
* Verify that partition columns, in any case, work.
*/
@Test
public void testPartitionColumnSelection() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
String dir0 = ScanTestUtils.partitionColName(0);
// Sic: case insensitivity, but name in project list
// is preferred over "natural" name.
String dir1 = "DIR1";
String dir2 = ScanTestUtils.partitionColName(2);
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(dir2, dir1, dir0, "a"),
Lists.newArrayList(implictColManager.projectionParser()));
assertEquals(4, scanProj.columns().size());
assertEquals(dir2, scanProj.columns().get(0).name());
assertEquals(dir1, scanProj.columns().get(1).name());
assertEquals(dir0, scanProj.columns().get(2).name());
assertEquals("a", scanProj.columns().get(3).name());
// Verify column type
assertTrue(scanProj.columns().get(0) instanceof PartitionColumn);
}
/**
* Test wildcard expansion.
*/
@Test
public void testRevisedWildcard() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectAll(),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(1, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
}
/**
* Combine wildcard and file metadata columns. The wildcard expands
* table columns but not metadata columns.
*/
@Test
public void testLegacyWildcardAndFileMetadata() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
options);
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(
SchemaPath.DYNAMIC_STAR,
ScanTestUtils.FILE_NAME_COL,
ScanTestUtils.SUFFIX_COL),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(5, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(1) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
/**
* As above, but include implicit columns before and after the
* wildcard.
*/
@Test
public void testLegacyWildcardAndFileMetadataMixed() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
options);
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(
ScanTestUtils.FILE_NAME_COL,
SchemaPath.DYNAMIC_STAR,
ScanTestUtils.SUFFIX_COL),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(5, cols.size());
assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(1) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
/**
* Include both a wildcard and a partition column. The wildcard, in
* legacy mode, will create partition columns for any partitions not
* mentioned in the project list.
* <p>
* Tests proposed functionality: included only requested partition
* columns.
*/
@Test
public void testRevisedWildcardAndPartition() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
ScanTestUtils.partitionColName(8)),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(2, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(1) instanceof PartitionColumn);
}
@Test
public void testPreferredPartitionExpansion() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
options);
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
ScanTestUtils.partitionColName(8)),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(4, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(1) instanceof PartitionColumn);
assertEquals(8, ((PartitionColumn) cols.get(1)).partition());
assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
assertEquals(1, ((PartitionColumn) cols.get(3)).partition());
}
@Test
public void testPreferedWildcardExpansionWithOverlap() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
options);
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR,
ScanTestUtils.partitionColName(1)),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(3, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(1) instanceof PartitionColumn);
assertEquals(1, ((PartitionColumn) cols.get(1)).partition());
assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
}
/**
* Verify that names that look like metadata columns, but appear
* to be maps or arrays, are not interpreted as metadata. That is,
* the projected table map or array "shadows" the metadata column.
*/
@Test
public void testShadowed() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnManager implictColManager = new ImplicitColumnManager(
fixture.getOptionManager(),
standardOptions(filePath));
ScanLevelProjection scanProj = ScanLevelProjection.build(
RowSetTestUtils.projectList(
ScanTestUtils.FILE_NAME_COL + ".a",
ScanTestUtils.FILE_PATH_COL + "[0]",
ScanTestUtils.partitionColName(0) + ".b",
ScanTestUtils.partitionColName(1) + "[0]",
ScanTestUtils.SUFFIX_COL),
Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(5, cols.size());
for (int i = 0; i < 4; i++) {
assertTrue(scanProj.columns().get(i) instanceof UnresolvedColumn);
}
assertTrue(scanProj.columns().get(4) instanceof FileMetadataColumn);
}
}