flink-tests/src/test/java/org/apache/flink/test/operators/CustomDistributionITCase.java - flink - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.flink.test.operators;

 import org.apache.flink.api.common.distributions.DataDistribution;
 import org.apache.flink.api.common.functions.RichMapPartitionFunction;
 import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
 import org.apache.flink.api.common.typeinfo.TypeInformation;
 import org.apache.flink.api.java.DataSet;
 import org.apache.flink.api.java.ExecutionEnvironment;
 import org.apache.flink.api.java.io.DiscardingOutputFormat;
 import org.apache.flink.api.java.tuple.Tuple3;
 import org.apache.flink.api.java.utils.DataSetUtils;
 import org.apache.flink.core.memory.DataInputView;
 import org.apache.flink.core.memory.DataOutputView;
 import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
 import org.apache.flink.test.operators.util.CollectionDataSets;
 import org.apache.flink.test.util.MiniClusterWithClientResource;
 import org.apache.flink.util.Collector;
 import org.apache.flink.util.TestLogger;

 import org.junit.ClassRule;
 import org.junit.Test;

 import java.io.IOException;

 import static org.junit.Assert.fail;

 /** Integration tests for custom {@link DataDistribution}. */
 @SuppressWarnings("serial")
 public class CustomDistributionITCase extends TestLogger {

     @ClassRule
     public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE =
             new MiniClusterWithClientResource(
                     new MiniClusterResourceConfiguration.Builder()
                             .setNumberTaskManagers(1)
                             .setNumberSlotsPerTaskManager(8)
                             .build());

     // ------------------------------------------------------------------------

     /**
      * Test the record partitioned rightly with one field according to the customized data
      * distribution.
      */
     @Test
     public void testPartitionWithDistribution1() throws Exception {
         final TestDataDist1 dist = new TestDataDist1();

         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
         env.setParallelism(dist.getParallelism());

         DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);

         DataSet<Boolean> result =
                 DataSetUtils.partitionByRange(input, dist, 0)
                         .mapPartition(
                                 new RichMapPartitionFunction<
                                         Tuple3<Integer, Long, String>, Boolean>() {

                                     @Override
                                     public void mapPartition(
                                             Iterable<Tuple3<Integer, Long, String>> values,
                                             Collector<Boolean> out)
                                             throws Exception {
                                         int pIdx = getRuntimeContext().getIndexOfThisSubtask();

                                         for (Tuple3<Integer, Long, String> s : values) {
                                             boolean correctlyPartitioned = true;
                                             if (pIdx == 0) {
                                                 Integer[] upper = dist.boundaries[0];
                                                 if (s.f0.compareTo(upper[0]) > 0) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else if (pIdx > 0
                                                     && pIdx < dist.getParallelism() - 1) {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 Integer[] upper = dist.boundaries[pIdx];
                                                 if (s.f0.compareTo(upper[0]) > 0
                                                         || (s.f0.compareTo(lower[0]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 if ((s.f0.compareTo(lower[0]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             }

                                             if (!correctlyPartitioned) {
                                                 fail(
                                                         "Record was not correctly partitioned: "
                                                                 + s.toString());
                                             }
                                         }
                                     }
                                 });

         result.output(new DiscardingOutputFormat<Boolean>());
         env.execute();
     }

     /**
      * Test the record partitioned rightly with two fields according to the customized data
      * distribution.
      */
     @Test
     public void testRangeWithDistribution2() throws Exception {
         final TestDataDist2 dist = new TestDataDist2();

         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
         env.setParallelism(dist.getParallelism());

         DataSet<Tuple3<Integer, Integer, String>> input =
                 env.fromElements(
                         new Tuple3<>(1, 5, "Hi"),
                         new Tuple3<>(1, 6, "Hi"),
                         new Tuple3<>(1, 7, "Hi"),
                         new Tuple3<>(1, 11, "Hello"),
                         new Tuple3<>(2, 3, "World"),
                         new Tuple3<>(2, 4, "World"),
                         new Tuple3<>(2, 5, "World"),
                         new Tuple3<>(2, 13, "Hello World"),
                         new Tuple3<>(3, 8, "Say"),
                         new Tuple3<>(4, 0, "Why"),
                         new Tuple3<>(4, 2, "Java"),
                         new Tuple3<>(4, 11, "Say Hello"),
                         new Tuple3<>(5, 1, "Hi Java!"),
                         new Tuple3<>(5, 2, "Hi Java?"),
                         new Tuple3<>(5, 3, "Hi Java again"));

         DataSet<Boolean> result =
                 DataSetUtils.partitionByRange(input, dist, 0, 1)
                         .mapPartition(
                                 new RichMapPartitionFunction<
                                         Tuple3<Integer, Integer, String>, Boolean>() {

                                     @Override
                                     public void mapPartition(
                                             Iterable<Tuple3<Integer, Integer, String>> values,
                                             Collector<Boolean> out)
                                             throws Exception {
                                         int pIdx = getRuntimeContext().getIndexOfThisSubtask();
                                         boolean correctlyPartitioned = true;

                                         for (Tuple3<Integer, Integer, String> s : values) {

                                             if (pIdx == 0) {
                                                 Integer[] upper = dist.boundaries[0];
                                                 if (s.f0.compareTo(upper[0]) > 0
                                                         || (s.f0.compareTo(upper[0]) == 0
                                                                 && s.f1.compareTo(upper[1]) > 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else if (pIdx > 0
                                                     && pIdx < dist.getParallelism() - 1) {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 Integer[] upper = dist.boundaries[pIdx];

                                                 if (s.f0.compareTo(upper[0]) > 0
                                                         || (s.f0.compareTo(upper[0]) == 0
                                                                 && s.f1.compareTo(upper[1]) > 0)
                                                         || (s.f0.compareTo(lower[0]) < 0)
                                                         || (s.f0.compareTo(lower[0]) == 0
                                                                 && s.f1.compareTo(lower[1]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 if ((s.f0.compareTo(lower[0]) < 0)
                                                         || (s.f0.compareTo(lower[0]) == 0
                                                                 && s.f1.compareTo(lower[1]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             }

                                             if (!correctlyPartitioned) {
                                                 fail(
                                                         "Record was not correctly partitioned: "
                                                                 + s.toString());
                                             }
                                         }
                                     }
                                 });

         result.output(new DiscardingOutputFormat<Boolean>());
         env.execute();
     }

     /*
      * Test the number of partition keys less than the number of distribution fields
      */
     @Test
     public void testPartitionKeyLessDistribution() throws Exception {
         final TestDataDist2 dist = new TestDataDist2();

         final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
         env.setParallelism(dist.getParallelism());

         DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);

         DataSet<Boolean> result =
                 DataSetUtils.partitionByRange(input, dist, 0)
                         .mapPartition(
                                 new RichMapPartitionFunction<
                                         Tuple3<Integer, Long, String>, Boolean>() {

                                     @Override
                                     public void mapPartition(
                                             Iterable<Tuple3<Integer, Long, String>> values,
                                             Collector<Boolean> out)
                                             throws Exception {
                                         int pIdx = getRuntimeContext().getIndexOfThisSubtask();

                                         for (Tuple3<Integer, Long, String> s : values) {
                                             boolean correctlyPartitioned = true;
                                             if (pIdx == 0) {
                                                 Integer[] upper = dist.boundaries[0];
                                                 if (s.f0.compareTo(upper[0]) > 0) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else if (pIdx > 0
                                                     && pIdx < dist.getParallelism() - 1) {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 Integer[] upper = dist.boundaries[pIdx];
                                                 if (s.f0.compareTo(upper[0]) > 0
                                                         || (s.f0.compareTo(lower[0]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             } else {
                                                 Integer[] lower = dist.boundaries[pIdx - 1];
                                                 if ((s.f0.compareTo(lower[0]) <= 0)) {
                                                     correctlyPartitioned = false;
                                                 }
                                             }

                                             if (!correctlyPartitioned) {
                                                 fail(
                                                         "Record was not correctly partitioned: "
                                                                 + s.toString());
                                             }
                                         }
                                     }
                                 });

         result.output(new DiscardingOutputFormat<Boolean>());
         env.execute();
     }

     /*
      * Test the number of partition keys larger than the number of distribution fields
      */
     @Test(expected = IllegalArgumentException.class)
     public void testPartitionMoreThanDistribution() throws Exception {
         final TestDataDist2 dist = new TestDataDist2();

         ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

         DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
         DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
     }

     /** The class is used to do the tests of range partition with one key. */
     public static class TestDataDist1 implements DataDistribution {

         public Integer[][] boundaries =
                 new Integer[][] {
                     new Integer[] {4}, new Integer[] {9}, new Integer[] {13}, new Integer[] {18}
                 };

         public TestDataDist1() {}

         public int getParallelism() {
             return boundaries.length;
         }

         @Override
         public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
             return boundaries[bucketNum];
         }

         @Override
         public int getNumberOfFields() {
             return 1;
         }

         @Override
         public TypeInformation[] getKeyTypes() {
             return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO};
         }

         @Override
         public void write(DataOutputView out) throws IOException {}

         @Override
         public void read(DataInputView in) throws IOException {}
     }

     /** The class is used to do the tests of range partition with two keys. */
     public static class TestDataDist2 implements DataDistribution {

         public Integer[][] boundaries =
                 new Integer[][] {
                     new Integer[] {1, 6},
                     new Integer[] {2, 4},
                     new Integer[] {3, 9},
                     new Integer[] {4, 1},
                     new Integer[] {5, 2}
                 };

         public TestDataDist2() {}

         public int getParallelism() {
             return boundaries.length;
         }

         @Override
         public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
             return boundaries[bucketNum];
         }

         @Override
         public int getNumberOfFields() {
             return 2;
         }

         @Override
         public TypeInformation[] getKeyTypes() {
             return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO};
         }

         @Override
         public void write(DataOutputView out) throws IOException {}

         @Override
         public void read(DataInputView in) throws IOException {}
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.flink.test.operators;

	import org.apache.flink.api.common.distributions.DataDistribution;
	import org.apache.flink.api.common.functions.RichMapPartitionFunction;
	import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
	import org.apache.flink.api.common.typeinfo.TypeInformation;
	import org.apache.flink.api.java.DataSet;
	import org.apache.flink.api.java.ExecutionEnvironment;
	import org.apache.flink.api.java.io.DiscardingOutputFormat;
	import org.apache.flink.api.java.tuple.Tuple3;
	import org.apache.flink.api.java.utils.DataSetUtils;
	import org.apache.flink.core.memory.DataInputView;
	import org.apache.flink.core.memory.DataOutputView;
	import org.apache.flink.runtime.testutils.MiniClusterResourceConfiguration;
	import org.apache.flink.test.operators.util.CollectionDataSets;
	import org.apache.flink.test.util.MiniClusterWithClientResource;
	import org.apache.flink.util.Collector;
	import org.apache.flink.util.TestLogger;

	import org.junit.ClassRule;
	import org.junit.Test;

	import java.io.IOException;

	import static org.junit.Assert.fail;

	/** Integration tests for custom {@link DataDistribution}. */
	@SuppressWarnings("serial")
	public class CustomDistributionITCase extends TestLogger {

	@ClassRule
	public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE =
	new MiniClusterWithClientResource(
	new MiniClusterResourceConfiguration.Builder()
	.setNumberTaskManagers(1)
	.setNumberSlotsPerTaskManager(8)
	.build());

	// ------------------------------------------------------------------------

	/**
	* Test the record partitioned rightly with one field according to the customized data
	* distribution.
	*/
	@Test
	public void testPartitionWithDistribution1() throws Exception {
	final TestDataDist1 dist = new TestDataDist1();

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(dist.getParallelism());

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Boolean> result =
	DataSetUtils.partitionByRange(input, dist, 0)
	.mapPartition(
	new RichMapPartitionFunction<
	Tuple3<Integer, Long, String>, Boolean>() {

	@Override
	public void mapPartition(
	Iterable<Tuple3<Integer, Long, String>> values,
	Collector<Boolean> out)
	throws Exception {
	int pIdx = getRuntimeContext().getIndexOfThisSubtask();

	for (Tuple3<Integer, Long, String> s : values) {
	boolean correctlyPartitioned = true;
	if (pIdx == 0) {
	Integer[] upper = dist.boundaries[0];
	if (s.f0.compareTo(upper[0]) > 0) {
	correctlyPartitioned = false;
	}
	} else if (pIdx > 0
	&& pIdx < dist.getParallelism() - 1) {
	Integer[] lower = dist.boundaries[pIdx - 1];
	Integer[] upper = dist.boundaries[pIdx];
	if (s.f0.compareTo(upper[0]) > 0
	\|\| (s.f0.compareTo(lower[0]) <= 0)) {
	correctlyPartitioned = false;
	}
	} else {
	Integer[] lower = dist.boundaries[pIdx - 1];
	if ((s.f0.compareTo(lower[0]) <= 0)) {
	correctlyPartitioned = false;
	}
	}

	if (!correctlyPartitioned) {
	fail(
	"Record was not correctly partitioned: "
	+ s.toString());
	}
	}
	}
	});

	result.output(new DiscardingOutputFormat<Boolean>());
	env.execute();
	}

	/**
	* Test the record partitioned rightly with two fields according to the customized data
	* distribution.
	*/
	@Test
	public void testRangeWithDistribution2() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(dist.getParallelism());

	DataSet<Tuple3<Integer, Integer, String>> input =
	env.fromElements(
	new Tuple3<>(1, 5, "Hi"),
	new Tuple3<>(1, 6, "Hi"),
	new Tuple3<>(1, 7, "Hi"),
	new Tuple3<>(1, 11, "Hello"),
	new Tuple3<>(2, 3, "World"),
	new Tuple3<>(2, 4, "World"),
	new Tuple3<>(2, 5, "World"),
	new Tuple3<>(2, 13, "Hello World"),
	new Tuple3<>(3, 8, "Say"),
	new Tuple3<>(4, 0, "Why"),
	new Tuple3<>(4, 2, "Java"),
	new Tuple3<>(4, 11, "Say Hello"),
	new Tuple3<>(5, 1, "Hi Java!"),
	new Tuple3<>(5, 2, "Hi Java?"),
	new Tuple3<>(5, 3, "Hi Java again"));

	DataSet<Boolean> result =
	DataSetUtils.partitionByRange(input, dist, 0, 1)
	.mapPartition(
	new RichMapPartitionFunction<
	Tuple3<Integer, Integer, String>, Boolean>() {

	@Override
	public void mapPartition(
	Iterable<Tuple3<Integer, Integer, String>> values,
	Collector<Boolean> out)
	throws Exception {
	int pIdx = getRuntimeContext().getIndexOfThisSubtask();
	boolean correctlyPartitioned = true;

	for (Tuple3<Integer, Integer, String> s : values) {

	if (pIdx == 0) {
	Integer[] upper = dist.boundaries[0];
	if (s.f0.compareTo(upper[0]) > 0
	\|\| (s.f0.compareTo(upper[0]) == 0
	&& s.f1.compareTo(upper[1]) > 0)) {
	correctlyPartitioned = false;
	}
	} else if (pIdx > 0
	&& pIdx < dist.getParallelism() - 1) {
	Integer[] lower = dist.boundaries[pIdx - 1];
	Integer[] upper = dist.boundaries[pIdx];

	if (s.f0.compareTo(upper[0]) > 0
	\|\| (s.f0.compareTo(upper[0]) == 0
	&& s.f1.compareTo(upper[1]) > 0)
	\|\| (s.f0.compareTo(lower[0]) < 0)
	\|\| (s.f0.compareTo(lower[0]) == 0
	&& s.f1.compareTo(lower[1]) <= 0)) {
	correctlyPartitioned = false;
	}
	} else {
	Integer[] lower = dist.boundaries[pIdx - 1];
	if ((s.f0.compareTo(lower[0]) < 0)
	\|\| (s.f0.compareTo(lower[0]) == 0
	&& s.f1.compareTo(lower[1]) <= 0)) {
	correctlyPartitioned = false;
	}
	}

	if (!correctlyPartitioned) {
	fail(
	"Record was not correctly partitioned: "
	+ s.toString());
	}
	}
	}
	});

	result.output(new DiscardingOutputFormat<Boolean>());
	env.execute();
	}

	/*
	* Test the number of partition keys less than the number of distribution fields
	*/
	@Test
	public void testPartitionKeyLessDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(dist.getParallelism());

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);

	DataSet<Boolean> result =
	DataSetUtils.partitionByRange(input, dist, 0)
	.mapPartition(
	new RichMapPartitionFunction<
	Tuple3<Integer, Long, String>, Boolean>() {

	@Override
	public void mapPartition(
	Iterable<Tuple3<Integer, Long, String>> values,
	Collector<Boolean> out)
	throws Exception {
	int pIdx = getRuntimeContext().getIndexOfThisSubtask();

	for (Tuple3<Integer, Long, String> s : values) {
	boolean correctlyPartitioned = true;
	if (pIdx == 0) {
	Integer[] upper = dist.boundaries[0];
	if (s.f0.compareTo(upper[0]) > 0) {
	correctlyPartitioned = false;
	}
	} else if (pIdx > 0
	&& pIdx < dist.getParallelism() - 1) {
	Integer[] lower = dist.boundaries[pIdx - 1];
	Integer[] upper = dist.boundaries[pIdx];
	if (s.f0.compareTo(upper[0]) > 0
	\|\| (s.f0.compareTo(lower[0]) <= 0)) {
	correctlyPartitioned = false;
	}
	} else {
	Integer[] lower = dist.boundaries[pIdx - 1];
	if ((s.f0.compareTo(lower[0]) <= 0)) {
	correctlyPartitioned = false;
	}
	}

	if (!correctlyPartitioned) {
	fail(
	"Record was not correctly partitioned: "
	+ s.toString());
	}
	}
	}
	});

	result.output(new DiscardingOutputFormat<Boolean>());
	env.execute();
	}

	/*
	* Test the number of partition keys larger than the number of distribution fields
	*/
	@Test(expected = IllegalArgumentException.class)
	public void testPartitionMoreThanDistribution() throws Exception {
	final TestDataDist2 dist = new TestDataDist2();

	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> input = CollectionDataSets.get3TupleDataSet(env);
	DataSetUtils.partitionByRange(input, dist, 0, 1, 2);
	}

	/** The class is used to do the tests of range partition with one key. */
	public static class TestDataDist1 implements DataDistribution {

	public Integer[][] boundaries =
	new Integer[][] {
	new Integer[] {4}, new Integer[] {9}, new Integer[] {13}, new Integer[] {18}
	};

	public TestDataDist1() {}

	public int getParallelism() {
	return boundaries.length;
	}

	@Override
	public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
	return boundaries[bucketNum];
	}

	@Override
	public int getNumberOfFields() {
	return 1;
	}

	@Override
	public TypeInformation[] getKeyTypes() {
	return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO};
	}

	@Override
	public void write(DataOutputView out) throws IOException {}

	@Override
	public void read(DataInputView in) throws IOException {}
	}

	/** The class is used to do the tests of range partition with two keys. */
	public static class TestDataDist2 implements DataDistribution {

	public Integer[][] boundaries =
	new Integer[][] {
	new Integer[] {1, 6},
	new Integer[] {2, 4},
	new Integer[] {3, 9},
	new Integer[] {4, 1},
	new Integer[] {5, 2}
	};

	public TestDataDist2() {}

	public int getParallelism() {
	return boundaries.length;
	}

	@Override
	public Object[] getBucketBoundary(int bucketNum, int totalNumBuckets) {
	return boundaries[bucketNum];
	}

	@Override
	public int getNumberOfFields() {
	return 2;
	}

	@Override
	public TypeInformation[] getKeyTypes() {
	return new TypeInformation[] {BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO};
	}

	@Override
	public void write(DataOutputView out) throws IOException {}

	@Override
	public void read(DataInputView in) throws IOException {}
	}
	}