blob: 62eb0c0c4a2bdd049246fd8d7bad88f615da8c7f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.operators;
import org.apache.flink.api.common.distributions.DataDistribution;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.DiscardingOutputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.DataSetUtils;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
import org.apache.flink.test.operators.util.CollectionDataSets;
import org.apache.flink.test.util.MiniClusterWithClientResource;
import org.apache.flink.testutils.junit.category.AlsoRunWithSchedulerNG;
import org.apache.flink.util.Collector;
import org.apache.flink.util.TestLogger;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import java.io.IOException;
import static org.junit.Assert.fail;
/**
* Integration tests for custom {@link DataDistribution}.
*/
@SuppressWarnings("serial")
@Category(AlsoRunWithSchedulerNG.class)
public class CustomDistributionITCase extends TestLogger {
@ClassRule
public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE = new MiniClusterWithClientResource(
new MiniClusterResourceConfiguration.Builder()
.setNumberTaskManagers(1)
.setNumberSlotsPerTaskManager(8)
.build());
// ------------------------------------------------------------------------
/**
* Test the record partitioned rightly with one field according to the customized data distribution.
*/
@Test
public void testPartitionWithDistribution1() throws Exception {
final TestDataDist1 dist = new TestDataDist1();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSet<Boolean> result = DataSetUtils
.partitionByRange(input, dist, 0)
.mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() {
@Override
public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
for (Tuple3<Integer, Long, String> s : values) {
boolean correctlyPartitioned = true;
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0) {
correctlyPartitioned = false;
}
}
else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail("Record was not correctly partitioned: " + s.toString());
}
}
}
}
);
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/**
* Test the record partitioned rightly with two fields according to the customized data distribution.
*/
@Test
public void testRangeWithDistribution2() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Integer, String>> input = env.fromElements(
new Tuple3<>(1, 5, "Hi"),
new Tuple3<>(1, 6, "Hi"),
new Tuple3<>(1, 7, "Hi"),
new Tuple3<>(1, 11, "Hello"),
new Tuple3<>(2, 3, "World"),
new Tuple3<>(2, 4, "World"),
new Tuple3<>(2, 5, "World"),
new Tuple3<>(2, 13, "Hello World"),
new Tuple3<>(3, 8, "Say"),
new Tuple3<>(4, 0, "Why"),
new Tuple3<>(4, 2, "Java"),
new Tuple3<>(4, 11, "Say Hello"),
new Tuple3<>(5, 1, "Hi Java!"),
new Tuple3<>(5, 2, "Hi Java?"),
new Tuple3<>(5, 3, "Hi Java again")
);
DataSet<Boolean> result = DataSetUtils
.partitionByRange(input, dist, 0, 1)
.mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Integer, String>, Boolean>() {
@Override
public void mapPartition(Iterable<Tuple3<Integer, Integer, String>> values, Collector<Boolean> out) throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
boolean correctlyPartitioned = true;
for (Tuple3<Integer, Integer, String> s : values) {
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0 ||
(s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0)) {
correctlyPartitioned = false;
}
}
else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0 ||
(s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0) ||
(s.f0.compareTo(lower[0]) < 0) ||
(s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) {
correctlyPartitioned = false;
}
}
else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) < 0) ||
(s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail("Record was not correctly partitioned: " + s.toString());
}
}
}
}
);
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/*
* Test the number of partition keys less than the number of distribution fields
*/
@Test
public void testPartitionKeyLessDistribution() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSet<Boolean> result = DataSetUtils
.partitionByRange(input, dist, 0)
.mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() {
@Override
public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
for (Tuple3<Integer, Long, String> s : values) {
boolean correctlyPartitioned = true;
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0) {
correctlyPartitioned = false;
}
}
else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail("Record was not correctly partitioned: " + s.toString());
}
}
}
}
);
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/*
* Test the number of partition keys larger than the number of distribution fields
*/
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
/**
* The class is used to do the tests of range partition with one key.
*/
public static class TestDataDist1 implements DataDistribution {
public Integer[][] boundaries = new Integer[][]{
new Integer[]{4},
new Integer[]{9},
new Integer[]{13},
new Integer[]{18}
};
public TestDataDist1() {}
public int getParallelism() {
return boundaries.length;
}
@Override
public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
return boundaries[bucketNum];
}
@Override
public int getNumberOfFields() {
return 1;
}
@Override
public TypeInformation[] getKeyTypes() {
return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO};
}
@Override
public void write(DataOutputView out) throws IOException {}
@Override
public void read(DataInputView in) throws IOException {}
}
/**
* The class is used to do the tests of range partition with two keys.
*/
public static class TestDataDist2 implements DataDistribution {
public Integer[][] boundaries = new Integer[][]{
new Integer[]{1, 6},
new Integer[]{2, 4},
new Integer[]{3, 9},
new Integer[]{4, 1},
new Integer[]{5, 2}
};
public TestDataDist2() {}
public int getParallelism() {
return boundaries.length;
}
@Override
public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
return boundaries[bucketNum];
}
@Override
public int getNumberOfFields() {
return 2;
}
@Override
public TypeInformation[] getKeyTypes() {
return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO};
}
@Override
public void write(DataOutputView out) throws IOException {}
@Override
public void read(DataInputView in) throws IOException {}
}
}