blob: ea3eae7ab2aa33d20a1dd6ea6d0ad3f8f56de730 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.test.operators;
import org.apache.flink.api.common.distributions.DataDistribution;
import org.apache.flink.api.common.functions.RichMapPartitionFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.io.DiscardingOutputFormat;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.utils.DataSetUtils;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
import org.apache.flink.test.operators.util.CollectionDataSets;
import org.apache.flink.test.util.MiniClusterWithClientResource;
import org.apache.flink.util.Collector;
import org.apache.flink.util.TestLogger;
import org.junit.ClassRule;
import org.junit.Test;
import java.io.IOException;
import static org.junit.Assert.fail;
/** Integration tests for custom {@link DataDistribution}. */
@SuppressWarnings("serial")
public class CustomDistributionITCase extends TestLogger {
@ClassRule
public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE =
new MiniClusterWithClientResource(
new MiniClusterResourceConfiguration.Builder()
.setNumberTaskManagers(1)
.setNumberSlotsPerTaskManager(8)
.build());
// ------------------------------------------------------------------------
/**
* Test the record partitioned rightly with one field according to the customized data
* distribution.
*/
@Test
public void testPartitionWithDistribution1() throws Exception {
final TestDataDist1 dist = new TestDataDist1();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSet<Boolean> result =
DataSetUtils.partitionByRange(input, dist, 0)
.mapPartition(
new RichMapPartitionFunction<
Tuple3<Integer, Long, String>, Boolean>() {
@Override
public void mapPartition(
Iterable<Tuple3<Integer, Long, String>> values,
Collector<Boolean> out)
throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
for (Tuple3<Integer, Long, String> s : values) {
boolean correctlyPartitioned = true;
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0) {
correctlyPartitioned = false;
}
} else if (pIdx > 0
&& pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0
|| (s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
} else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail(
"Record was not correctly partitioned: "
+ s.toString());
}
}
}
});
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/**
* Test the record partitioned rightly with two fields according to the customized data
* distribution.
*/
@Test
public void testRangeWithDistribution2() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Integer, String>> input =
env.fromElements(
new Tuple3<>(1, 5, "Hi"),
new Tuple3<>(1, 6, "Hi"),
new Tuple3<>(1, 7, "Hi"),
new Tuple3<>(1, 11, "Hello"),
new Tuple3<>(2, 3, "World"),
new Tuple3<>(2, 4, "World"),
new Tuple3<>(2, 5, "World"),
new Tuple3<>(2, 13, "Hello World"),
new Tuple3<>(3, 8, "Say"),
new Tuple3<>(4, 0, "Why"),
new Tuple3<>(4, 2, "Java"),
new Tuple3<>(4, 11, "Say Hello"),
new Tuple3<>(5, 1, "Hi Java!"),
new Tuple3<>(5, 2, "Hi Java?"),
new Tuple3<>(5, 3, "Hi Java again"));
DataSet<Boolean> result =
DataSetUtils.partitionByRange(input, dist, 0, 1)
.mapPartition(
new RichMapPartitionFunction<
Tuple3<Integer, Integer, String>, Boolean>() {
@Override
public void mapPartition(
Iterable<Tuple3<Integer, Integer, String>> values,
Collector<Boolean> out)
throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
boolean correctlyPartitioned = true;
for (Tuple3<Integer, Integer, String> s : values) {
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0
|| (s.f0.compareTo(upper[0]) == 0
&& s.f1.compareTo(upper[1]) > 0)) {
correctlyPartitioned = false;
}
} else if (pIdx > 0
&& pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0
|| (s.f0.compareTo(upper[0]) == 0
&& s.f1.compareTo(upper[1]) > 0)
|| (s.f0.compareTo(lower[0]) < 0)
|| (s.f0.compareTo(lower[0]) == 0
&& s.f1.compareTo(lower[1]) <= 0)) {
correctlyPartitioned = false;
}
} else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) < 0)
|| (s.f0.compareTo(lower[0]) == 0
&& s.f1.compareTo(lower[1]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail(
"Record was not correctly partitioned: "
+ s.toString());
}
}
}
});
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/*
* Test the number of partition keys less than the number of distribution fields
*/
@Test
public void testPartitionKeyLessDistribution() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(dist.getParallelism());
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSet<Boolean> result =
DataSetUtils.partitionByRange(input, dist, 0)
.mapPartition(
new RichMapPartitionFunction<
Tuple3<Integer, Long, String>, Boolean>() {
@Override
public void mapPartition(
Iterable<Tuple3<Integer, Long, String>> values,
Collector<Boolean> out)
throws Exception {
int pIdx = getRuntimeContext().getIndexOfThisSubtask();
for (Tuple3<Integer, Long, String> s : values) {
boolean correctlyPartitioned = true;
if (pIdx == 0) {
Integer[] upper = dist.boundaries[0];
if (s.f0.compareTo(upper[0]) > 0) {
correctlyPartitioned = false;
}
} else if (pIdx > 0
&& pIdx < dist.getParallelism() - 1) {
Integer[] lower = dist.boundaries[pIdx - 1];
Integer[] upper = dist.boundaries[pIdx];
if (s.f0.compareTo(upper[0]) > 0
|| (s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
} else {
Integer[] lower = dist.boundaries[pIdx - 1];
if ((s.f0.compareTo(lower[0]) <= 0)) {
correctlyPartitioned = false;
}
}
if (!correctlyPartitioned) {
fail(
"Record was not correctly partitioned: "
+ s.toString());
}
}
}
});
result.output(new DiscardingOutputFormat<Boolean>());
env.execute();
}
/*
* Test the number of partition keys larger than the number of distribution fields
*/
@Test(expected = IllegalArgumentException.class)
public void testPartitionMoreThanDistribution() throws Exception {
final TestDataDist2 dist = new TestDataDist2();
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
}
/** The class is used to do the tests of range partition with one key. */
public static class TestDataDist1 implements DataDistribution {
public Integer[][] boundaries =
new Integer[][] {
new Integer[] {4}, new Integer[] {9}, new Integer[] {13}, new Integer[] {18}
};
public TestDataDist1() {}
public int getParallelism() {
return boundaries.length;
}
@Override
public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
return boundaries[bucketNum];
}
@Override
public int getNumberOfFields() {
return 1;
}
@Override
public TypeInformation[] getKeyTypes() {
return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO};
}
@Override
public void write(DataOutputView out) throws IOException {}
@Override
public void read(DataInputView in) throws IOException {}
}
/** The class is used to do the tests of range partition with two keys. */
public static class TestDataDist2 implements DataDistribution {
public Integer[][] boundaries =
new Integer[][] {
new Integer[] {1, 6},
new Integer[] {2, 4},
new Integer[] {3, 9},
new Integer[] {4, 1},
new Integer[] {5, 2}
};
public TestDataDist2() {}
public int getParallelism() {
return boundaries.length;
}
@Override
public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
return boundaries[bucketNum];
}
@Override
public int getNumberOfFields() {
return 2;
}
@Override
public TypeInformation[] getKeyTypes() {
return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO};
}
@Override
public void write(DataOutputView out) throws IOException {}
@Override
public void read(DataInputView in) throws IOException {}
}
}