| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.parquet.hive.internal; |
| |
| import java.io.IOException; |
| import java.io.Serializable; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| |
| import org.apache.hadoop.fs.Path; |
| import org.apache.hadoop.hive.conf.HiveConf; |
| import org.apache.hadoop.hive.ql.exec.Operator; |
| import org.apache.hadoop.hive.ql.exec.TableScanOperator; |
| import org.apache.hadoop.hive.ql.exec.Utilities; |
| import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; |
| import org.apache.hadoop.hive.ql.plan.MapredWork; |
| import org.apache.hadoop.hive.ql.plan.PartitionDesc; |
| import org.apache.hadoop.hive.ql.plan.TableScanDesc; |
| import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; |
| import org.apache.hadoop.mapred.JobConf; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| |
| /** |
| * Hive 0.10 implementation of {@link org.apache.parquet.hive.HiveBinding HiveBinding}. |
| * This class is a renamed version of |
| * <a href="http://bit.ly/1a4tcrb">ManageJobConfig</a> class. |
| */ |
| public class Hive010Binding extends AbstractHiveBinding { |
| private static final Logger LOG = LoggerFactory.getLogger(Hive010Binding.class); |
| private final Map<String, PartitionDesc> pathToPartitionInfo = |
| new LinkedHashMap<String, PartitionDesc>(); |
| /** |
| * MapredWork is the Hive object which describes input files, |
| * columns projections, and filters. |
| */ |
| private MapredWork mrwork; |
| |
| /** |
| * Initialize the mrwork variable in order to get all the partition and start to update the jobconf |
| * |
| * @param job |
| */ |
| private void init(final JobConf job) { |
| final String plan = HiveConf.getVar(job, HiveConf.ConfVars.PLAN); |
| if (mrwork == null && plan != null && plan.length() > 0) { |
| mrwork = Utilities.getMapRedWork(job); |
| pathToPartitionInfo.clear(); |
| for (final Map.Entry<String, PartitionDesc> entry : mrwork.getPathToPartitionInfo().entrySet()) { |
| pathToPartitionInfo.put(new Path(entry.getKey()).toUri().getPath().toString(), entry.getValue()); |
| } |
| } |
| } |
| |
| private void pushProjectionsAndFilters(final JobConf jobConf, |
| final String splitPath, final String splitPathWithNoSchema) { |
| |
| if (mrwork == null) { |
| LOG.debug("Not pushing projections and filters because MapredWork is null"); |
| return; |
| } else if (mrwork.getPathToAliases() == null) { |
| LOG.debug("Not pushing projections and filters because pathToAliases is null"); |
| return; |
| } |
| |
| final ArrayList<String> aliases = new ArrayList<String>(); |
| final Iterator<Entry<String, ArrayList<String>>> iterator = mrwork.getPathToAliases().entrySet().iterator(); |
| |
| while (iterator.hasNext()) { |
| final Entry<String, ArrayList<String>> entry = iterator.next(); |
| final String key = new Path(entry.getKey()).toUri().getPath(); |
| |
| if (splitPath.equals(key) || splitPathWithNoSchema.equals(key)) { |
| final ArrayList<String> list = entry.getValue(); |
| for (final String val : list) { |
| aliases.add(val); |
| } |
| } |
| } |
| |
| for (final String alias : aliases) { |
| final Operator<? extends Serializable> op = mrwork.getAliasToWork().get( |
| alias); |
| if (op != null && op instanceof TableScanOperator) { |
| final TableScanOperator tableScan = (TableScanOperator) op; |
| |
| // push down projections |
| final ArrayList<Integer> list = tableScan.getNeededColumnIDs(); |
| |
| if (list != null) { |
| ColumnProjectionUtils.appendReadColumnIDs(jobConf, list); |
| } else { |
| ColumnProjectionUtils.setFullyReadColumns(jobConf); |
| } |
| |
| pushFilters(jobConf, tableScan); |
| } |
| } |
| } |
| |
| private void pushFilters(final JobConf jobConf, final TableScanOperator tableScan) { |
| |
| final TableScanDesc scanDesc = tableScan.getConf(); |
| if (scanDesc == null) { |
| LOG.debug("Not pushing filters because TableScanDesc is null"); |
| return; |
| } |
| |
| // construct column name list for reference by filter push down |
| Utilities.setColumnNameList(jobConf, tableScan); |
| |
| // push down filters |
| final ExprNodeDesc filterExpr = scanDesc.getFilterExpr(); |
| if (filterExpr == null) { |
| LOG.debug("Not pushing filters because FilterExpr is null"); |
| return; |
| } |
| |
| final String filterText = filterExpr.getExprString(); |
| final String filterExprSerialized = Utilities.serializeExpression(filterExpr); |
| jobConf.set( |
| TableScanDesc.FILTER_TEXT_CONF_STR, |
| filterText); |
| jobConf.set( |
| TableScanDesc.FILTER_EXPR_CONF_STR, |
| filterExprSerialized); |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) |
| throws IOException { |
| init(jobConf); |
| final JobConf cloneJobConf = new JobConf(jobConf); |
| final PartitionDesc part = pathToPartitionInfo.get(path.toString()); |
| |
| if ((part != null) && (part.getTableDesc() != null)) { |
| Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); |
| } |
| |
| pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().toString()); |
| return cloneJobConf; |
| } |
| |
| } |