| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.flink.test.operators; |
| |
| import org.apache.flink.api.common.distributions.DataDistribution; |
| import org.apache.flink.api.common.functions.RichMapPartitionFunction; |
| import org.apache.flink.api.common.typeinfo.BasicTypeInfo; |
| import org.apache.flink.api.common.typeinfo.TypeInformation; |
| import org.apache.flink.api.java.DataSet; |
| import org.apache.flink.api.java.ExecutionEnvironment; |
| import org.apache.flink.api.java.io.DiscardingOutputFormat; |
| import org.apache.flink.api.java.tuple.Tuple3; |
| import org.apache.flink.api.java.utils.DataSetUtils; |
| import org.apache.flink.core.memory.DataInputView; |
| import org.apache.flink.core.memory.DataOutputView; |
| import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration; |
| import org.apache.flink.test.operators.util.CollectionDataSets; |
| import org.apache.flink.test.util.MiniClusterWithClientResource; |
| import org.apache.flink.testutils.junit.category.AlsoRunWithSchedulerNG; |
| import org.apache.flink.util.Collector; |
| import org.apache.flink.util.TestLogger; |
| |
| import org.junit.ClassRule; |
| import org.junit.Test; |
| import org.junit.experimental.categories.Category; |
| |
| import java.io.IOException; |
| |
| import static org.junit.Assert.fail; |
| |
| /** |
| * Integration tests for custom {@link DataDistribution}. |
| */ |
| @SuppressWarnings("serial") |
| @Category(AlsoRunWithSchedulerNG.class) |
| public class CustomDistributionITCase extends TestLogger { |
| |
| @ClassRule |
| public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE = new MiniClusterWithClientResource( |
| new MiniClusterResourceConfiguration.Builder() |
| .setNumberTaskManagers(1) |
| .setNumberSlotsPerTaskManager(8) |
| .build()); |
| |
| // ------------------------------------------------------------------------ |
| |
| /** |
| * Test the record partitioned rightly with one field according to the customized data distribution. |
| */ |
| @Test |
| public void testPartitionWithDistribution1() throws Exception { |
| final TestDataDist1 dist = new TestDataDist1(); |
| |
| final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); |
| env.setParallelism(dist.getParallelism()); |
| |
| DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); |
| |
| DataSet<Boolean> result = DataSetUtils |
| .partitionByRange(input, dist, 0) |
| .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() { |
| |
| @Override |
| public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception { |
| int pIdx = getRuntimeContext().getIndexOfThisSubtask(); |
| |
| for (Tuple3<Integer, Long, String> s : values) { |
| boolean correctlyPartitioned = true; |
| if (pIdx == 0) { |
| Integer[] upper = dist.boundaries[0]; |
| if (s.f0.compareTo(upper[0]) > 0) { |
| correctlyPartitioned = false; |
| } |
| } |
| else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| Integer[] upper = dist.boundaries[pIdx]; |
| if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| else { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| if ((s.f0.compareTo(lower[0]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| |
| if (!correctlyPartitioned) { |
| fail("Record was not correctly partitioned: " + s.toString()); |
| } |
| } |
| } |
| } |
| ); |
| |
| result.output(new DiscardingOutputFormat<Boolean>()); |
| env.execute(); |
| } |
| |
| /** |
| * Test the record partitioned rightly with two fields according to the customized data distribution. |
| */ |
| @Test |
| public void testRangeWithDistribution2() throws Exception { |
| final TestDataDist2 dist = new TestDataDist2(); |
| |
| final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); |
| env.setParallelism(dist.getParallelism()); |
| |
| DataSet<Tuple3<Integer, Integer, String>> input = env.fromElements( |
| new Tuple3<>(1, 5, "Hi"), |
| new Tuple3<>(1, 6, "Hi"), |
| new Tuple3<>(1, 7, "Hi"), |
| new Tuple3<>(1, 11, "Hello"), |
| new Tuple3<>(2, 3, "World"), |
| new Tuple3<>(2, 4, "World"), |
| new Tuple3<>(2, 5, "World"), |
| new Tuple3<>(2, 13, "Hello World"), |
| new Tuple3<>(3, 8, "Say"), |
| new Tuple3<>(4, 0, "Why"), |
| new Tuple3<>(4, 2, "Java"), |
| new Tuple3<>(4, 11, "Say Hello"), |
| new Tuple3<>(5, 1, "Hi Java!"), |
| new Tuple3<>(5, 2, "Hi Java?"), |
| new Tuple3<>(5, 3, "Hi Java again") |
| ); |
| |
| DataSet<Boolean> result = DataSetUtils |
| .partitionByRange(input, dist, 0, 1) |
| .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Integer, String>, Boolean>() { |
| |
| @Override |
| public void mapPartition(Iterable<Tuple3<Integer, Integer, String>> values, Collector<Boolean> out) throws Exception { |
| int pIdx = getRuntimeContext().getIndexOfThisSubtask(); |
| boolean correctlyPartitioned = true; |
| |
| for (Tuple3<Integer, Integer, String> s : values) { |
| |
| if (pIdx == 0) { |
| Integer[] upper = dist.boundaries[0]; |
| if (s.f0.compareTo(upper[0]) > 0 || |
| (s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| Integer[] upper = dist.boundaries[pIdx]; |
| |
| if (s.f0.compareTo(upper[0]) > 0 || |
| (s.f0.compareTo(upper[0]) == 0 && s.f1.compareTo(upper[1]) > 0) || |
| (s.f0.compareTo(lower[0]) < 0) || |
| (s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| else { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| if ((s.f0.compareTo(lower[0]) < 0) || |
| (s.f0.compareTo(lower[0]) == 0 && s.f1.compareTo(lower[1]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| |
| if (!correctlyPartitioned) { |
| fail("Record was not correctly partitioned: " + s.toString()); |
| } |
| } |
| } |
| } |
| ); |
| |
| result.output(new DiscardingOutputFormat<Boolean>()); |
| env.execute(); |
| } |
| |
| /* |
| * Test the number of partition keys less than the number of distribution fields |
| */ |
| @Test |
| public void testPartitionKeyLessDistribution() throws Exception { |
| final TestDataDist2 dist = new TestDataDist2(); |
| |
| final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); |
| env.setParallelism(dist.getParallelism()); |
| |
| DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); |
| |
| DataSet<Boolean> result = DataSetUtils |
| .partitionByRange(input, dist, 0) |
| .mapPartition(new RichMapPartitionFunction<Tuple3<Integer, Long, String>, Boolean>() { |
| |
| @Override |
| public void mapPartition(Iterable<Tuple3<Integer, Long, String>> values, Collector<Boolean> out) throws Exception { |
| int pIdx = getRuntimeContext().getIndexOfThisSubtask(); |
| |
| for (Tuple3<Integer, Long, String> s : values) { |
| boolean correctlyPartitioned = true; |
| if (pIdx == 0) { |
| Integer[] upper = dist.boundaries[0]; |
| if (s.f0.compareTo(upper[0]) > 0) { |
| correctlyPartitioned = false; |
| } |
| } |
| else if (pIdx > 0 && pIdx < dist.getParallelism() - 1) { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| Integer[] upper = dist.boundaries[pIdx]; |
| if (s.f0.compareTo(upper[0]) > 0 || (s.f0.compareTo(lower[0]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| else { |
| Integer[] lower = dist.boundaries[pIdx - 1]; |
| if ((s.f0.compareTo(lower[0]) <= 0)) { |
| correctlyPartitioned = false; |
| } |
| } |
| |
| if (!correctlyPartitioned) { |
| fail("Record was not correctly partitioned: " + s.toString()); |
| } |
| } |
| } |
| } |
| ); |
| |
| result.output(new DiscardingOutputFormat<Boolean>()); |
| env.execute(); |
| } |
| |
| /* |
| * Test the number of partition keys larger than the number of distribution fields |
| */ |
| @Test(expected = IllegalArgumentException.class) |
| public void testPartitionMoreThanDistribution() throws Exception { |
| final TestDataDist2 dist = new TestDataDist2(); |
| |
| ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); |
| |
| DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env); |
| DataSetUtils.partitionByRange(input, dist, 0, 1, 2); |
| } |
| |
| /** |
| * The class is used to do the tests of range partition with one key. |
| */ |
| public static class TestDataDist1 implements DataDistribution { |
| |
| public Integer[][] boundaries = new Integer[][]{ |
| new Integer[]{4}, |
| new Integer[]{9}, |
| new Integer[]{13}, |
| new Integer[]{18} |
| }; |
| |
| public TestDataDist1() {} |
| |
| public int getParallelism() { |
| return boundaries.length; |
| } |
| |
| @Override |
| public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) { |
| return boundaries[bucketNum]; |
| } |
| |
| @Override |
| public int getNumberOfFields() { |
| return 1; |
| } |
| |
| @Override |
| public TypeInformation[] getKeyTypes() { |
| return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO}; |
| } |
| |
| @Override |
| public void write(DataOutputView out) throws IOException {} |
| |
| @Override |
| public void read(DataInputView in) throws IOException {} |
| } |
| |
| /** |
| * The class is used to do the tests of range partition with two keys. |
| */ |
| public static class TestDataDist2 implements DataDistribution { |
| |
| public Integer[][] boundaries = new Integer[][]{ |
| new Integer[]{1, 6}, |
| new Integer[]{2, 4}, |
| new Integer[]{3, 9}, |
| new Integer[]{4, 1}, |
| new Integer[]{5, 2} |
| }; |
| |
| public TestDataDist2() {} |
| |
| public int getParallelism() { |
| return boundaries.length; |
| } |
| |
| @Override |
| public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) { |
| return boundaries[bucketNum]; |
| } |
| |
| @Override |
| public int getNumberOfFields() { |
| return 2; |
| } |
| |
| @Override |
| public TypeInformation[] getKeyTypes() { |
| return new TypeInformation[]{BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO}; |
| } |
| |
| @Override |
| public void write(DataOutputView out) throws IOException {} |
| |
| @Override |
| public void read(DataInputView in) throws IOException {} |
| } |
| } |