exec/java-exec/src/test/java/org/apache/drill/exec/physical/impl/scan/TestColumnsArrayParser.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.physical.impl.scan;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.fail;

 import org.apache.drill.categories.RowSetTest;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayParser;
 import org.apache.drill.exec.physical.impl.scan.columns.ColumnsScanFramework;
 import org.apache.drill.exec.physical.impl.scan.columns.UnresolvedColumnsArrayColumn;
 import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
 import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager;
 import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions;
 import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
 import org.apache.drill.exec.physical.rowSet.RowSetTestUtils;
 import com.google.common.collect.Lists;
 import org.apache.drill.test.SubOperatorTest;
 import org.apache.hadoop.fs.Path;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;

 @Category(RowSetTest.class)
 public class TestColumnsArrayParser extends SubOperatorTest {

   /**
    * Test the special "columns" column that asks to return all columns
    * as an array. No need for early schema. This case is special: it actually
    * creates the one and only table column to match the desired output column.
    */
   @Test
   public void testColumnsArray() {
     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertFalse(scanProj.projectAll());
     assertEquals(1, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
   }

   @Test
   public void testRequiredColumnsArray() {
     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertFalse(scanProj.projectAll());
     assertEquals(1, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
   }

   @Test
   public void testRequiredWildcard() {
     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectAll(),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertTrue(scanProj.projectAll());
     assertEquals(1, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
   }

   @Test
   public void testColumnsArrayCaseInsensitive() {

     // Sic: case variation of standard name

     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList("Columns"),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertFalse(scanProj.projectAll());
     assertEquals(1, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals("Columns", scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
   }

   @Test
   public void testColumnsElements() {

    ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(
             ColumnsScanFramework.COLUMNS_COL + "[3]",
             ColumnsScanFramework.COLUMNS_COL + "[1]"),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertFalse(scanProj.projectAll());
     assertEquals(2, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
     UnresolvedColumnsArrayColumn colsCol = (UnresolvedColumnsArrayColumn) scanProj.columns().get(0);
     boolean indexes[] = colsCol.selectedIndexes();
     assertNotNull(indexes);
     assertEquals(4, indexes.length);
     assertFalse(indexes[0]);
     assertTrue(indexes[1]);
     assertFalse(indexes[0]);
     assertTrue(indexes[1]);
   }

   /**
    * The `columns` column is special; can't include both `columns` and
    * a named column in the same project.
    * <p>
    * TODO: This should only be true for text readers, make this an option.
    */
   @Test
   public void testErrorColumnsArrayAndColumn() {
     try {
       ScanLevelProjection.build(
           RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL, "a"),
           ScanTestUtils.parsers(new ColumnsArrayParser(true)));
       fail();
     } catch (UserException e) {
       // Expected
     }
   }

   /**
    * Exclude a column and `columns` (reversed order of previous test).
    */
   @Test
   public void testErrorColumnAndColumnsArray() {
     try {
       ScanLevelProjection.build(
           RowSetTestUtils.projectList("a", ColumnsScanFramework.COLUMNS_COL),
           ScanTestUtils.parsers(new ColumnsArrayParser(true)));
       fail();
     } catch (UserException e) {
       // Expected
     }
   }

   /**
    * Requesting `columns` twice: second is ignored.
    */
   @Test
   public void testTwoColumnsArray() {
     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL, ColumnsScanFramework.COLUMNS_COL),
         ScanTestUtils.parsers(new ColumnsArrayParser(false)));
     assertFalse(scanProj.projectAll());
     assertEquals(2, scanProj.requestedCols().size());
     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());
   }

   @Test
   public void testErrorRequiredAndExtra() {
     try {
       ScanLevelProjection.build(
           RowSetTestUtils.projectList("a"),
           ScanTestUtils.parsers(new ColumnsArrayParser(true)));
       fail();
     } catch (UserException e) {
       // Expected
     }
   }

   @Test
   public void testColumnsIndexTooLarge() {
     try {
       ScanLevelProjection.build(
           RowSetTestUtils.projectCols(SchemaPath.parseFromString("columns[70000]")),
           ScanTestUtils.parsers(new ColumnsArrayParser(true)));
       fail();
     } catch (UserException e) {
       // Expected
     }
   }

   private ImplicitColumnOptions standardOptions(Path filePath) {
     ImplicitColumnOptions options = new ImplicitColumnOptions();
     options.useLegacyWildcardExpansion(false); // Don't expand partition columns for wildcard
     options.setSelectionRoot(new Path("hdfs:///w"));
     options.setFiles(Lists.newArrayList(filePath));
     return options;
   }

   /**
    * The `columns` column is special: can't be used with other column names.
    * Make sure that the rule <i>does not</i> apply to implicit columns.
    */
   @Test
   public void testMetadataColumnsWithColumnsArray() {
     Path filePath = new Path("hdfs:///w/x/y/z.csv");
     ImplicitColumnManager metadataManager = new ImplicitColumnManager(
         fixture.getOptionManager(),
         standardOptions(filePath));

     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL,
             ColumnsScanFramework.COLUMNS_COL,
             ScanTestUtils.SUFFIX_COL),
         ScanTestUtils.parsers(new ColumnsArrayParser(true),
             metadataManager.projectionParser()));

     assertFalse(scanProj.projectAll());

     assertEquals(3, scanProj.columns().size());

     assertEquals(ScanTestUtils.FILE_NAME_COL, scanProj.columns().get(0).name());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(1).name());
     assertEquals(ScanTestUtils.SUFFIX_COL, scanProj.columns().get(2).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
     assertTrue(scanProj.columns().get(1) instanceof UnresolvedColumnsArrayColumn);
     assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
   }

   /**
    * If a query is of the form:
    * <pre><code>
    * select * from dfs.`multilevel/csv` where columns[1] < 1000
    * </code><pre>
    * Then the projection list passed to the scan operator
    * includes both the wildcard and the `columns` array.
    * We can ignore one of them.
    */
   @Test
   public void testWildcardAndColumns() {
     ScanLevelProjection scanProj = ScanLevelProjection.build(
         RowSetTestUtils.projectList(
             SchemaPath.DYNAMIC_STAR,
             ColumnsScanFramework.COLUMNS_COL),
         ScanTestUtils.parsers(new ColumnsArrayParser(true)));

     assertTrue(scanProj.projectAll());
     assertEquals(2, scanProj.requestedCols().size());

     assertEquals(1, scanProj.columns().size());
     assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

     // Verify column type

     assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
   }

   @Test
   public void testColumnsAsMap() {
     try {
         ScanLevelProjection.build(
           RowSetTestUtils.projectList("columns.x"),
           ScanTestUtils.parsers(new ColumnsArrayParser(true)));
         fail();
     }
     catch (UserException e) {
       // Expected
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.physical.impl.scan;

	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertFalse;
	import static org.junit.Assert.assertNotNull;
	import static org.junit.Assert.assertTrue;
	import static org.junit.Assert.fail;

	import org.apache.drill.categories.RowSetTest;
	import org.apache.drill.common.exceptions.UserException;
	import org.apache.drill.common.expression.SchemaPath;
	import org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayParser;
	import org.apache.drill.exec.physical.impl.scan.columns.ColumnsScanFramework;
	import org.apache.drill.exec.physical.impl.scan.columns.UnresolvedColumnsArrayColumn;
	import org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn;
	import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager;
	import org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions;
	import org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection;
	import org.apache.drill.exec.physical.rowSet.RowSetTestUtils;
	import com.google.common.collect.Lists;
	import org.apache.drill.test.SubOperatorTest;
	import org.apache.hadoop.fs.Path;
	import org.junit.Test;
	import org.junit.experimental.categories.Category;

	@Category(RowSetTest.class)
	public class TestColumnsArrayParser extends SubOperatorTest {

	/**
	* Test the special "columns" column that asks to return all columns
	* as an array. No need for early schema. This case is special: it actually
	* creates the one and only table column to match the desired output column.
	*/
	@Test
	public void testColumnsArray() {
	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertFalse(scanProj.projectAll());
	assertEquals(1, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	}

	@Test
	public void testRequiredColumnsArray() {
	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertFalse(scanProj.projectAll());
	assertEquals(1, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	}

	@Test
	public void testRequiredWildcard() {
	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectAll(),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertTrue(scanProj.projectAll());
	assertEquals(1, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	}

	@Test
	public void testColumnsArrayCaseInsensitive() {

	// Sic: case variation of standard name

	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList("Columns"),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertFalse(scanProj.projectAll());
	assertEquals(1, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals("Columns", scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	}

	@Test
	public void testColumnsElements() {

	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(
	ColumnsScanFramework.COLUMNS_COL + "[3]",
	ColumnsScanFramework.COLUMNS_COL + "[1]"),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertFalse(scanProj.projectAll());
	assertEquals(2, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	UnresolvedColumnsArrayColumn colsCol = (UnresolvedColumnsArrayColumn) scanProj.columns().get(0);
	boolean indexes[] = colsCol.selectedIndexes();
	assertNotNull(indexes);
	assertEquals(4, indexes.length);
	assertFalse(indexes[0]);
	assertTrue(indexes[1]);
	assertFalse(indexes[0]);
	assertTrue(indexes[1]);
	}

	/**
	* The `columns` column is special; can't include both `columns` and
	* a named column in the same project.
	* <p>
	* TODO: This should only be true for text readers, make this an option.
	*/
	@Test
	public void testErrorColumnsArrayAndColumn() {
	try {
	ScanLevelProjection.build(
	RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL, "a"),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));
	fail();
	} catch (UserException e) {
	// Expected
	}
	}

	/**
	* Exclude a column and `columns` (reversed order of previous test).
	*/
	@Test
	public void testErrorColumnAndColumnsArray() {
	try {
	ScanLevelProjection.build(
	RowSetTestUtils.projectList("a", ColumnsScanFramework.COLUMNS_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));
	fail();
	} catch (UserException e) {
	// Expected
	}
	}

	/**
	* Requesting `columns` twice: second is ignored.
	*/
	@Test
	public void testTwoColumnsArray() {
	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL, ColumnsScanFramework.COLUMNS_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(false)));
	assertFalse(scanProj.projectAll());
	assertEquals(2, scanProj.requestedCols().size());
	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());
	}

	@Test
	public void testErrorRequiredAndExtra() {
	try {
	ScanLevelProjection.build(
	RowSetTestUtils.projectList("a"),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));
	fail();
	} catch (UserException e) {
	// Expected
	}
	}

	@Test
	public void testColumnsIndexTooLarge() {
	try {
	ScanLevelProjection.build(
	RowSetTestUtils.projectCols(SchemaPath.parseFromString("columns[70000]")),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));
	fail();
	} catch (UserException e) {
	// Expected
	}
	}

	private ImplicitColumnOptions standardOptions(Path filePath) {
	ImplicitColumnOptions options = new ImplicitColumnOptions();
	options.useLegacyWildcardExpansion(false); // Don't expand partition columns for wildcard
	options.setSelectionRoot(new Path("hdfs:///w"));
	options.setFiles(Lists.newArrayList(filePath));
	return options;
	}

	/**
	* The `columns` column is special: can't be used with other column names.
	* Make sure that the rule <i>does not</i> apply to implicit columns.
	*/
	@Test
	public void testMetadataColumnsWithColumnsArray() {
	Path filePath = new Path("hdfs:///w/x/y/z.csv");
	ImplicitColumnManager metadataManager = new ImplicitColumnManager(
	fixture.getOptionManager(),
	standardOptions(filePath));

	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL,
	ColumnsScanFramework.COLUMNS_COL,
	ScanTestUtils.SUFFIX_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(true),
	metadataManager.projectionParser()));

	assertFalse(scanProj.projectAll());

	assertEquals(3, scanProj.columns().size());

	assertEquals(ScanTestUtils.FILE_NAME_COL, scanProj.columns().get(0).name());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(1).name());
	assertEquals(ScanTestUtils.SUFFIX_COL, scanProj.columns().get(2).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
	assertTrue(scanProj.columns().get(1) instanceof UnresolvedColumnsArrayColumn);
	assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
	}

	/**
	* If a query is of the form:
	* <pre><code>
	* select * from dfs.`multilevel/csv` where columns[1] < 1000
	* </code><pre>
	* Then the projection list passed to the scan operator
	* includes both the wildcard and the `columns` array.
	* We can ignore one of them.
	*/
	@Test
	public void testWildcardAndColumns() {
	ScanLevelProjection scanProj = ScanLevelProjection.build(
	RowSetTestUtils.projectList(
	SchemaPath.DYNAMIC_STAR,
	ColumnsScanFramework.COLUMNS_COL),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));

	assertTrue(scanProj.projectAll());
	assertEquals(2, scanProj.requestedCols().size());

	assertEquals(1, scanProj.columns().size());
	assertEquals(ColumnsScanFramework.COLUMNS_COL, scanProj.columns().get(0).name());

	// Verify column type

	assertTrue(scanProj.columns().get(0) instanceof UnresolvedColumnsArrayColumn);
	}

	@Test
	public void testColumnsAsMap() {
	try {
	ScanLevelProjection.build(
	RowSetTestUtils.projectList("columns.x"),
	ScanTestUtils.parsers(new ColumnsArrayParser(true)));
	fail();
	}
	catch (UserException e) {
	// Expected
	}
	}
	}