blob: 629f01e8e5ab15189fbecc62e5aa3d563dda8b97 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.split;
import java.io.IOException;
import java.util.List;
import javax.annotation.Nullable;
import com.google.common.base.Function;
import org.apache.tez.common.Preconditions;
import com.google.common.collect.Lists;
import org.apache.tez.mapreduce.grouper.GroupedSplitContainer;
import org.apache.tez.mapreduce.grouper.MapReduceSplitContainer;
import org.apache.tez.mapreduce.grouper.SplitContainer;
import org.apache.tez.mapreduce.grouper.SplitSizeEstimatorWrapperMapReduce;
import org.apache.tez.mapreduce.grouper.TezSplitGrouper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputSplit;
/**
* Helper that provides a grouping of input splits based
* on multiple parameters. It creates {@link TezGroupedSplit}
* to wrap the each group of real InputSplits
*/
@Public
@Evolving
public class TezMapReduceSplitsGrouper extends TezSplitGrouper {
private static final Logger LOG = LoggerFactory.getLogger(TezMapReduceSplitsGrouper.class);
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_COUNT = TezSplitGrouper.TEZ_GROUPING_SPLIT_COUNT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_BY_LENGTH = TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final boolean TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT = TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_BY_COUNT = TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_COUNT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final boolean TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT = TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_COUNT_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_WAVES = TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final float TEZ_GROUPING_SPLIT_WAVES_DEFAULT = TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_MAX_SIZE = TezSplitGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final long TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT = TezSplitGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_SPLIT_MIN_SIZE = TezSplitGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final long TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT = TezSplitGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION =
TezSplitGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final float TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT = TezSplitGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION_DEFAULT;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final String TEZ_GROUPING_REPEATABLE = TezSplitGrouper.TEZ_GROUPING_REPEATABLE;
/**
* @deprecated See equivalent in {@link TezSplitGrouper}
*/
@Deprecated
public static final boolean TEZ_GROUPING_REPEATABLE_DEFAULT = TezSplitGrouper.TEZ_GROUPING_REPEATABLE_DEFAULT;
public List<InputSplit> getGroupedSplits(Configuration conf,
List<InputSplit> originalSplits, int desiredNumSplits,
String wrappedInputFormatName) throws IOException, InterruptedException {
return getGroupedSplits(conf, originalSplits, desiredNumSplits,
wrappedInputFormatName, null);
}
public List<InputSplit> getGroupedSplits(Configuration conf,
List<InputSplit> originalSplits, int desiredNumSplits,
String wrappedInputFormatName,
SplitSizeEstimator estimator) throws IOException,
InterruptedException {
return getGroupedSplits(conf, originalSplits, desiredNumSplits, wrappedInputFormatName, estimator, null);
}
public List<InputSplit> getGroupedSplits(Configuration conf,
List<InputSplit> originalSplits, int desiredNumSplits,
String wrappedInputFormatName,
SplitSizeEstimator estimator,
SplitLocationProvider locationProvider) throws IOException,
InterruptedException {
Preconditions.checkArgument(originalSplits != null, "Splits must be specified");
List<SplitContainer> originalSplitContainers = Lists.transform(originalSplits,
new Function<InputSplit, SplitContainer>() {
@Override
public SplitContainer apply(InputSplit input) {
return new MapReduceSplitContainer(input);
}
});
return Lists.transform(super
.getGroupedSplits(conf, originalSplitContainers, desiredNumSplits,
wrappedInputFormatName, estimator == null ? null :
new SplitSizeEstimatorWrapperMapReduce(estimator),
locationProvider == null ? null :
new SplitLocationProviderMapReduce(locationProvider)),
new Function<GroupedSplitContainer, InputSplit>() {
@Override
public InputSplit apply(GroupedSplitContainer input) {
List<InputSplit> underlyingSplits = Lists.transform(input.getWrappedSplitContainers(),
new Function<SplitContainer, InputSplit>() {
@Override
public InputSplit apply(SplitContainer input) {
return ((MapReduceSplitContainer) input).getRawSplit();
}
});
return new TezGroupedSplit(underlyingSplits, input.getWrappedInputFormatName(),
input.getLocations(), input.getRack(), input.getLength());
}
});
}
/**
* Builder that can be used to configure grouping in Tez
*
* @deprecated See {@link org.apache.tez.mapreduce.grouper.TezSplitGrouper.TezMRSplitsGrouperConfigBuilder#newConfigBuilder(Configuration)}
*
* @param conf
* {@link Configuration} This will be modified in place. If
* configuration values may be changed at runtime via a config file
* then pass in a {@link Configuration} that is initialized from a
* config file. The parameters that are not overridden in code will
* be derived from the Configuration object.
* @return {@link org.apache.hadoop.mapreduce.split.TezMapReduceSplitsGrouper.TezMRSplitsGrouperConfigBuilder}
*/
@Deprecated
public static TezMRSplitsGrouperConfigBuilder createConfigBuilder(Configuration conf) {
return new TezMRSplitsGrouperConfigBuilder(conf);
}
/**
* @deprecated See {@link org.apache.tez.mapreduce.grouper.TezSplitGrouper.TezMRSplitsGrouperConfigBuilder}
*/
@Deprecated
public static final class TezMRSplitsGrouperConfigBuilder {
private final Configuration conf;
/**
* This configuration will be modified in place
*/
private TezMRSplitsGrouperConfigBuilder(@Nullable Configuration conf) {
if (conf == null) {
conf = new Configuration(false);
}
this.conf = conf;
}
public TezMRSplitsGrouperConfigBuilder setGroupSplitCount(int count) {
this.conf.setInt(TezSplitGrouper.TEZ_GROUPING_SPLIT_COUNT, count);
return this;
}
public TezMRSplitsGrouperConfigBuilder setGroupSplitByCount(boolean enabled) {
this.conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_COUNT, enabled);
return this;
}
public TezMRSplitsGrouperConfigBuilder setGroupSplitByLength(boolean enabled) {
this.conf.setBoolean(TezSplitGrouper.TEZ_GROUPING_SPLIT_BY_LENGTH, enabled);
return this;
}
public TezMRSplitsGrouperConfigBuilder setGroupSplitWaves(float multiplier) {
this.conf.setFloat(TezSplitGrouper.TEZ_GROUPING_SPLIT_WAVES, multiplier);
return this;
}
public TezMRSplitsGrouperConfigBuilder setGroupingRackSplitSizeReduction(float rackSplitSizeReduction) {
this.conf.setFloat(TezSplitGrouper.TEZ_GROUPING_RACK_SPLIT_SIZE_REDUCTION, rackSplitSizeReduction);
return this;
}
/**
* upper and lower bounds for the splits
*/
public TezMRSplitsGrouperConfigBuilder setGroupingSplitSize(long lowerBound, long upperBound) {
this.conf.setLong(TezSplitGrouper.TEZ_GROUPING_SPLIT_MIN_SIZE, lowerBound);
this.conf.setLong(TezSplitGrouper.TEZ_GROUPING_SPLIT_MAX_SIZE, upperBound);
return this;
}
public Configuration build() {
return this.conf;
}
}
}