exec/java-exec/src/main/java/org/apache/drill/exec/physical/resultSet/project/RequestedTupleImpl.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.physical.resultSet.project;

 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;

 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.expression.PathSegment;
 import org.apache.drill.common.expression.PathSegment.ArraySegment;
 import org.apache.drill.common.expression.PathSegment.NameSegment;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.record.metadata.TupleNameSpace;

 /**
  * Represents an explicit projection at some tuple level.
  * <p>
  * A column is projected if it is explicitly listed in the selection list.
  * <p>
  * If a column is a map, then the projection for the map's columns is based on
  * two rules:
  * <ol>
  * <li>If the projection list includes at least one explicit mention of a map
  * member, then include only those columns explicitly listed.</li>
  * <li>If the projection at the parent level lists only the map column itself
  * (which the projection can't know is a map), then assume this implies all
  * columns, as if the entry where "map.*".</li>
  * </ol>
  * <p>
  * Examples:<br>
  * <code>m</code><br>
  * If <code>m</code> turns out to be a map, project all members of
  * <code>m</code>.<br>
  * <code>m.a</code><br>
  * Column <code>m</code> must be a map. Project only column <code>a</code>.<br>
  * <code>m, m.a</code><br>
  * Tricky case. We interpret this as projecting only the "a" element of map m.
  * <p>
  * The projection set is built from a list of columns, represented as
  * {@link SchemaPath} objects, provided by the physical plan. The structure of
  * <tt>SchemaPath</tt> is a bit awkward:
  * <p>
  * <ul>
  * <li><tt>SchemaPath> is a wrapper for a column which directly holds the
  * <tt>NameSegment</tt> for the top-level column.</li>
  * <li><tt>NameSegment</tt> holds a name. This can be a top name such as
  * `a`, or parts of a compound name such as `a`.`b`. Each <tt>NameSegment</tt>
  * has a "child" that points to the option following parts of the name.</li>
  * <li><PathSegment</tt> is the base class for the parts of a name.</tt>
  * <li><tt>ArraySegment</tt> is the other kind of name part and represents
  * an array index such as the "[1]" in `columns`[1].</li>
  * <ul>
  * The parser here consumes only names, this mechanism does not consider
  * array indexes. As a result, there may be multiple projected columns that
  * map to the same projection here: `columns`[1] and `columns`[2] both map to
  * the name `columns`, for example.
  */

 public class RequestedTupleImpl implements RequestedTuple {

   private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RequestedTupleImpl.class);
   private static final Collection<SchemaPath> PROJECT_ALL = Collections.singletonList(SchemaPath.STAR_COLUMN);

   private final RequestedColumnImpl parent;
   private final TupleNameSpace<RequestedColumn> projection = new TupleNameSpace<>();

   public RequestedTupleImpl() {
     parent = null;
   }

   public RequestedTupleImpl(RequestedColumnImpl parent) {
     this.parent = parent;
   }

   public RequestedTupleImpl(List<RequestedColumn> cols) {
     parent = null;
     for (RequestedColumn col : cols) {
       projection.add(col.name(), col);
     }
   }

   @Override
   public RequestedColumn get(String colName) {
     return projection.get(colName.toLowerCase());
   }

   private RequestedColumnImpl getImpl(String colName) {
     return (RequestedColumnImpl) get(colName);
   }

   @Override
   public ProjectionType projectionType(String colName) {
     RequestedColumn col = get(colName);
     return col == null ? ProjectionType.UNPROJECTED : col.type();
   }

   @Override
   public RequestedTuple mapProjection(String colName) {
     RequestedColumnImpl col = getImpl(colName);
     RequestedTuple mapProj = (col == null) ? null : col.mapProjection();
     if (mapProj != null) {
       return mapProj;
     }

     // No explicit information for the map. Members inherit the
     // same projection as the map itself.

     if (col != null) {
       return col.projectAllMembers(true);
     }
     return ImpliedTupleRequest.NO_MEMBERS;
   }

   /**
    * Create a requested tuple projection from a rewritten top-level
    * projection list. The columns within the list have already been parsed to
    * pick out arrays, maps and scalars. The list must not include the
    * wildcard: a wildcard list must be passed in as a null list. An
    * empty list means project nothing. Null list means project all, else
    * project only the columns in the list.
    *
    * @param projList top-level, parsed columns
    * @return the tuple projection for the top-leel row
    */

   public static RequestedTuple build(List<RequestedColumn> projList) {
     if (projList == null) {
       return new ImpliedTupleRequest(true);
     }
     if (projList.isEmpty()) {
       return ImpliedTupleRequest.NO_MEMBERS;
     }
     return new RequestedTupleImpl(projList);
   }

   /**
    * Parse a projection list. The list should consist of a list of column names;
    * or wildcards. An empty list means
    * nothing is projected. A null list means everything is projected (that is, a
    * null list here is equivalent to a wildcard in the SELECT statement.)
    * <p>
    * The projection list may include both a wildcard and column names (as in
    * the case of implicit columns.) This results in a final list that both
    * says that everything is projected, and provides the list of columns.
    * <p>
    * Parsing is used at two different times. First, to parse the list from
    * the physical operator. This has the case above: an explicit wildcard
    * and/or additional columns. Then, this class is used again to prepare the
    * physical projection used when reading. In this case, wildcards should
    * be removed, implicit columns pulled out, and just the list of read-level
    * columns should remain.
    *
    * @param projList
    *          the list of projected columns, or null if no projection is to be
    *          done
    * @return a projection set that implements the specified projection
    */

   public static RequestedTuple parse(Collection<SchemaPath> projList) {
     if (projList == null) {
       projList = PROJECT_ALL;
     }
     else if (projList.isEmpty()) {
       return ImpliedTupleRequest.NO_MEMBERS;
     }
     RequestedTupleImpl projSet = new RequestedTupleImpl();
     for (SchemaPath col : projList) {
       projSet.parseSegment(col.getRootSegment());
     }
     return projSet;
   }

   @Override
   public void parseSegment(PathSegment pathSeg) {
     if (pathSeg.isLastPath()) {
       parseLeaf((NameSegment) pathSeg);
     } else if (pathSeg.getChild().isArray()) {
       parseArray((NameSegment) pathSeg);
     } else {
       parseInternal((NameSegment) pathSeg);
     }
   }

   private void parseLeaf(NameSegment nameSeg) {
     String name = nameSeg.getPath();
     RequestedColumnImpl member = getImpl(name);
     if (member == null) {
       projection.add(name, new RequestedColumnImpl(this, name));
       return;
     }
     if (member.isSimple() || member.isWildcard()) {
       throw UserException
         .validationError()
         .message("Duplicate column in project list: %s",
             member.fullName())
         .build(logger);
     }
     if (member.isArray()) {

       // Saw both a and a[x]. Occurs in project list.
       // Project all elements.

       member.projectAllElements();
       return;
     }

     // Else the column is a known map.

     assert member.isTuple();

     // Allow both a.b (existing) and a (this column)
     // Since we we know a is a map, and we've projected the
     // whole map, modify the projection of the column to
     // project the entire map.

     member.projectAllMembers(true);
   }

   private void parseInternal(NameSegment nameSeg) {
     String name = nameSeg.getPath();
     RequestedColumnImpl member = getImpl(name);
     RequestedTuple map;
     if (member == null) {
       // New member. Since this is internal, this new member
       // must be a map.

       member = new RequestedColumnImpl(this, name);
       projection.add(name, member);
       map = member.asTuple();
     } else if (member.isTuple()) {

       // Known map. Add to it.

       map = member.asTuple();
     } else {

       // Member was previously projected by itself. We now
       // know it is a map. So, project entire map. (Earlier
       // we saw `a`. Now we see `a`.`b`.)

       map = member.projectAllMembers(true);
     }
     map.parseSegment(nameSeg.getChild());
   }

   private void parseArray(NameSegment nameSeg) {
     String name = nameSeg.getPath();
     ArraySegment arraySeg = ((ArraySegment) nameSeg.getChild());
     int index = arraySeg.getIndex();
     RequestedColumnImpl member = getImpl(name);
     if (member == null) {
       member = new RequestedColumnImpl(this, name);
       projection.add(name, member);
     } else if (member.isSimple()) {

       // Saw both a and a[x]. Occurs in project list.
       // Project all elements.

       member.projectAllElements();
       return;
     }

     // Allow duplicate indexes. Example: z[0], z[0]['orange']
     if (!member.hasIndex(index)) {
       member.addIndex(index);
     }

     // Drills SQL parser does not support map arrays: a[0].c
     // But, the SchemaPath does support them, so no harm in
     // parsing them here.

     if (! arraySeg.isLastPath()) {
       parseInternal(nameSeg);
     }
   }

   @Override
   public List<RequestedColumn> projections() {
     return projection.entries();
   }

   @Override
   public void buildName(StringBuilder buf) {
     if (parent != null) {
       parent.buildName(buf);
     }
   }

   /**
    * Tuple projection type. This is a rough approximation. A scan-level projection
    * may include both a wildcard and implicit columns. This form is best used
    * in testing where such ambiguities do not apply.
    */
   @Override
   public TupleProjectionType type() {
     if (projection.isEmpty()) {
       return TupleProjectionType.NONE;
     }
     for (RequestedColumn col : projection) {
       if (col.isWildcard()) {
         return TupleProjectionType.ALL;
       }
     }
     return TupleProjectionType.SOME;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.drill.exec.physical.resultSet.project;

	import java.util.Collection;
	import java.util.Collections;
	import java.util.List;

	import org.apache.drill.common.exceptions.UserException;
	import org.apache.drill.common.expression.PathSegment;
	import org.apache.drill.common.expression.PathSegment.ArraySegment;
	import org.apache.drill.common.expression.PathSegment.NameSegment;
	import org.apache.drill.common.expression.SchemaPath;
	import org.apache.drill.exec.record.metadata.TupleNameSpace;

	/**
	* Represents an explicit projection at some tuple level.
	* <p>
	* A column is projected if it is explicitly listed in the selection list.
	* <p>
	* If a column is a map, then the projection for the map's columns is based on
	* two rules:
	* <ol>
	* <li>If the projection list includes at least one explicit mention of a map
	* member, then include only those columns explicitly listed.</li>
	* <li>If the projection at the parent level lists only the map column itself
	* (which the projection can't know is a map), then assume this implies all
	* columns, as if the entry where "map.*".</li>
	* </ol>
	* <p>
	* Examples:<br>
	* <code>m</code><br>
	* If <code>m</code> turns out to be a map, project all members of
	* <code>m</code>.<br>
	* <code>m.a</code><br>
	* Column <code>m</code> must be a map. Project only column <code>a</code>.<br>
	* <code>m, m.a</code><br>
	* Tricky case. We interpret this as projecting only the "a" element of map m.
	* <p>
	* The projection set is built from a list of columns, represented as
	* {@link SchemaPath} objects, provided by the physical plan. The structure of
	* <tt>SchemaPath</tt> is a bit awkward:
	* <p>
	* <ul>
	* <li><tt>SchemaPath> is a wrapper for a column which directly holds the
	* <tt>NameSegment</tt> for the top-level column.</li>
	* <li><tt>NameSegment</tt> holds a name. This can be a top name such as
	* `a`, or parts of a compound name such as `a`.`b`. Each <tt>NameSegment</tt>
	* has a "child" that points to the option following parts of the name.</li>
	* <li><PathSegment</tt> is the base class for the parts of a name.</tt>
	* <li><tt>ArraySegment</tt> is the other kind of name part and represents
	* an array index such as the "[1]" in `columns`[1].</li>
	* <ul>
	* The parser here consumes only names, this mechanism does not consider
	* array indexes. As a result, there may be multiple projected columns that
	* map to the same projection here: `columns`[1] and `columns`[2] both map to
	* the name `columns`, for example.
	*/

	public class RequestedTupleImpl implements RequestedTuple {

	private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(RequestedTupleImpl.class);
	private static final Collection<SchemaPath> PROJECT_ALL = Collections.singletonList(SchemaPath.STAR_COLUMN);

	private final RequestedColumnImpl parent;
	private final TupleNameSpace<RequestedColumn> projection = new TupleNameSpace<>();

	public RequestedTupleImpl() {
	parent = null;
	}

	public RequestedTupleImpl(RequestedColumnImpl parent) {
	this.parent = parent;
	}

	public RequestedTupleImpl(List<RequestedColumn> cols) {
	parent = null;
	for (RequestedColumn col : cols) {
	projection.add(col.name(), col);
	}
	}

	@Override
	public RequestedColumn get(String colName) {
	return projection.get(colName.toLowerCase());
	}

	private RequestedColumnImpl getImpl(String colName) {
	return (RequestedColumnImpl) get(colName);
	}

	@Override
	public ProjectionType projectionType(String colName) {
	RequestedColumn col = get(colName);
	return col == null ? ProjectionType.UNPROJECTED : col.type();
	}

	@Override
	public RequestedTuple mapProjection(String colName) {
	RequestedColumnImpl col = getImpl(colName);
	RequestedTuple mapProj = (col == null) ? null : col.mapProjection();
	if (mapProj != null) {
	return mapProj;
	}

	// No explicit information for the map. Members inherit the
	// same projection as the map itself.

	if (col != null) {
	return col.projectAllMembers(true);
	}
	return ImpliedTupleRequest.NO_MEMBERS;
	}

	/**
	* Create a requested tuple projection from a rewritten top-level
	* projection list. The columns within the list have already been parsed to
	* pick out arrays, maps and scalars. The list must not include the
	* wildcard: a wildcard list must be passed in as a null list. An
	* empty list means project nothing. Null list means project all, else
	* project only the columns in the list.
	*
	* @param projList top-level, parsed columns
	* @return the tuple projection for the top-leel row
	*/

	public static RequestedTuple build(List<RequestedColumn> projList) {
	if (projList == null) {
	return new ImpliedTupleRequest(true);
	}
	if (projList.isEmpty()) {
	return ImpliedTupleRequest.NO_MEMBERS;
	}
	return new RequestedTupleImpl(projList);
	}

	/**
	* Parse a projection list. The list should consist of a list of column names;
	* or wildcards. An empty list means
	* nothing is projected. A null list means everything is projected (that is, a
	* null list here is equivalent to a wildcard in the SELECT statement.)
	* <p>
	* The projection list may include both a wildcard and column names (as in
	* the case of implicit columns.) This results in a final list that both
	* says that everything is projected, and provides the list of columns.
	* <p>
	* Parsing is used at two different times. First, to parse the list from
	* the physical operator. This has the case above: an explicit wildcard
	* and/or additional columns. Then, this class is used again to prepare the
	* physical projection used when reading. In this case, wildcards should
	* be removed, implicit columns pulled out, and just the list of read-level
	* columns should remain.
	*
	* @param projList
	* the list of projected columns, or null if no projection is to be
	* done
	* @return a projection set that implements the specified projection
	*/

	public static RequestedTuple parse(Collection<SchemaPath> projList) {
	if (projList == null) {
	projList = PROJECT_ALL;
	}
	else if (projList.isEmpty()) {
	return ImpliedTupleRequest.NO_MEMBERS;
	}
	RequestedTupleImpl projSet = new RequestedTupleImpl();
	for (SchemaPath col : projList) {
	projSet.parseSegment(col.getRootSegment());
	}
	return projSet;
	}

	@Override
	public void parseSegment(PathSegment pathSeg) {
	if (pathSeg.isLastPath()) {
	parseLeaf((NameSegment) pathSeg);
	} else if (pathSeg.getChild().isArray()) {
	parseArray((NameSegment) pathSeg);
	} else {
	parseInternal((NameSegment) pathSeg);
	}
	}

	private void parseLeaf(NameSegment nameSeg) {
	String name = nameSeg.getPath();
	RequestedColumnImpl member = getImpl(name);
	if (member == null) {
	projection.add(name, new RequestedColumnImpl(this, name));
	return;
	}
	if (member.isSimple() \|\| member.isWildcard()) {
	throw UserException
	.validationError()
	.message("Duplicate column in project list: %s",
	member.fullName())
	.build(logger);
	}
	if (member.isArray()) {

	// Saw both a and a[x]. Occurs in project list.
	// Project all elements.

	member.projectAllElements();
	return;
	}

	// Else the column is a known map.

	assert member.isTuple();

	// Allow both a.b (existing) and a (this column)
	// Since we we know a is a map, and we've projected the
	// whole map, modify the projection of the column to
	// project the entire map.

	member.projectAllMembers(true);
	}

	private void parseInternal(NameSegment nameSeg) {
	String name = nameSeg.getPath();
	RequestedColumnImpl member = getImpl(name);
	RequestedTuple map;
	if (member == null) {
	// New member. Since this is internal, this new member
	// must be a map.

	member = new RequestedColumnImpl(this, name);
	projection.add(name, member);
	map = member.asTuple();
	} else if (member.isTuple()) {

	// Known map. Add to it.

	map = member.asTuple();
	} else {

	// Member was previously projected by itself. We now
	// know it is a map. So, project entire map. (Earlier
	// we saw `a`. Now we see `a`.`b`.)

	map = member.projectAllMembers(true);
	}
	map.parseSegment(nameSeg.getChild());
	}

	private void parseArray(NameSegment nameSeg) {
	String name = nameSeg.getPath();
	ArraySegment arraySeg = ((ArraySegment) nameSeg.getChild());
	int index = arraySeg.getIndex();
	RequestedColumnImpl member = getImpl(name);
	if (member == null) {
	member = new RequestedColumnImpl(this, name);
	projection.add(name, member);
	} else if (member.isSimple()) {

	// Saw both a and a[x]. Occurs in project list.
	// Project all elements.

	member.projectAllElements();
	return;
	}

	// Allow duplicate indexes. Example: z[0], z[0]['orange']
	if (!member.hasIndex(index)) {
	member.addIndex(index);
	}

	// Drills SQL parser does not support map arrays: a[0].c
	// But, the SchemaPath does support them, so no harm in
	// parsing them here.

	if (! arraySeg.isLastPath()) {
	parseInternal(nameSeg);
	}
	}

	@Override
	public List<RequestedColumn> projections() {
	return projection.entries();
	}

	@Override
	public void buildName(StringBuilder buf) {
	if (parent != null) {
	parent.buildName(buf);
	}
	}

	/**
	* Tuple projection type. This is a rough approximation. A scan-level projection
	* may include both a wildcard and implicit columns. This form is best used
	* in testing where such ambiguities do not apply.
	*/
	@Override
	public TupleProjectionType type() {
	if (projection.isEmpty()) {
	return TupleProjectionType.NONE;
	}
	for (RequestedColumn col : projection) {
	if (col.isWildcard()) {
	return TupleProjectionType.ALL;
	}
	}
	return TupleProjectionType.SOME;
	}
	}