| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.beam.sdk.extensions.euphoria.core.client.operator; |
| |
| import static org.junit.Assert.assertEquals; |
| import static org.junit.Assert.assertFalse; |
| import static org.junit.Assert.assertNotNull; |
| import static org.junit.Assert.assertSame; |
| import static org.junit.Assert.assertTrue; |
| |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.stream.Collectors; |
| import org.apache.beam.sdk.extensions.euphoria.core.client.type.TypePropagationAssert; |
| import org.apache.beam.sdk.extensions.euphoria.core.client.util.Sums; |
| import org.apache.beam.sdk.transforms.windowing.BoundedWindow; |
| import org.apache.beam.sdk.transforms.windowing.DefaultTrigger; |
| import org.apache.beam.sdk.transforms.windowing.FixedWindows; |
| import org.apache.beam.sdk.transforms.windowing.Window; |
| import org.apache.beam.sdk.transforms.windowing.WindowDesc; |
| import org.apache.beam.sdk.values.KV; |
| import org.apache.beam.sdk.values.PCollection; |
| import org.apache.beam.sdk.values.TypeDescriptor; |
| import org.apache.beam.sdk.values.TypeDescriptors; |
| import org.apache.beam.sdk.values.WindowingStrategy.AccumulationMode; |
| import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Iterables; |
| import org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.Lists; |
| import org.joda.time.Duration; |
| import org.junit.Test; |
| |
| /** Test operator ReduceByKey. */ |
| public class ReduceByKeyTest { |
| |
| @Test |
| public void testBuild() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final FixedWindows windowing = FixedWindows.of(org.joda.time.Duration.standardHours(1)); |
| final DefaultTrigger trigger = DefaultTrigger.of(); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.named("ReduceByKey1") |
| .of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(Sums.ofLongs()) |
| .windowBy(windowing) |
| .triggeredBy(trigger) |
| .discardingFiredPanes() |
| .withAllowedLateness(Duration.standardSeconds(1000)) |
| .output(); |
| |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.getName().isPresent()); |
| assertEquals("ReduceByKey1", reduce.getName().get()); |
| assertNotNull(reduce.getKeyExtractor()); |
| assertNotNull(reduce.getValueExtractor()); |
| assertTrue(reduce.isCombineFnStyle()); |
| assertNotNull(reduce.getAccumulatorFactory()); |
| assertNotNull(reduce.getAccumulate()); |
| assertNotNull(reduce.getAccumulatorType()); |
| assertNotNull(reduce.getMergeAccumulators()); |
| assertNotNull(reduce.getOutputFn()); |
| |
| assertTrue(reduce.getWindow().isPresent()); |
| @SuppressWarnings("unchecked") |
| final WindowDesc<?> windowDesc = WindowDesc.of((Window) reduce.getWindow().get()); |
| assertEquals(windowing, windowDesc.getWindowFn()); |
| assertEquals(trigger, windowDesc.getTrigger()); |
| assertEquals(AccumulationMode.DISCARDING_FIRED_PANES, windowDesc.getAccumulationMode()); |
| assertEquals(Duration.standardSeconds(1000), windowDesc.getAllowedLateness()); |
| } |
| |
| @Test |
| public void testBuild_OutputValues() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<Long> reduced = |
| ReduceByKey.named("ReduceByKeyValues") |
| .of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(Sums.ofLongs()) |
| .outputValues(); |
| |
| final OutputValues outputValues = (OutputValues) TestUtils.getProducer(reduced); |
| assertTrue(outputValues.getName().isPresent()); |
| assertEquals("ReduceByKeyValues", outputValues.getName().get()); |
| } |
| |
| @Test |
| public void testBuild_ImplicitName() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset).keyBy(s -> s).valueBy(s -> 1L).combineBy(Sums.ofLongs()).output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertFalse(reduce.getName().isPresent()); |
| } |
| |
| @Test |
| public void testBuild_ReduceBy() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .reduceBy(s -> s.mapToLong(e -> e).sum()) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertNotNull(reduce.getReducer()); |
| assertFalse(reduce.isCombineFnStyle()); |
| } |
| |
| @Test |
| public void testBuild_CombineByStream() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(s -> s.mapToLong(e -> e).sum()) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertNotNull(reduce.getReducer()); |
| assertFalse(reduce.isCombineFnStyle()); |
| } |
| |
| @Test |
| public void testBuild_CombineByFull() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Integer>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy( |
| () -> new ArrayList<>(), |
| (acc, e) -> { |
| acc.add(e); |
| return acc; |
| }, |
| (l, r) -> Lists.newArrayList(Iterables.concat(l, r)), |
| List::size, |
| TypeDescriptors.lists(TypeDescriptors.longs()), |
| TypeDescriptors.integers()) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.isCombineFnStyle()); |
| assertNotNull(reduce.getAccumulatorFactory()); |
| assertNotNull(reduce.getAccumulatorType()); |
| assertNotNull(reduce.getAccumulate()); |
| assertNotNull(reduce.getMergeAccumulators()); |
| assertNotNull(reduce.getOutputFn()); |
| assertTrue(reduce.getOutputType().isPresent()); |
| } |
| |
| @Test |
| public void testBuild_CombineBy() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(0L, (a, b) -> a + b) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.isCombineFnStyle()); |
| assertNotNull(reduce.getAccumulatorFactory()); |
| assertNotNull(reduce.getAccumulate()); |
| assertNotNull(reduce.getMergeAccumulators()); |
| assertNotNull(reduce.getOutputFn()); |
| assertTrue(reduce.getOutputType().isPresent()); |
| } |
| |
| @Test |
| public void testBuild_Windowing() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(Sums.ofLongs()) |
| .windowBy(FixedWindows.of(Duration.standardHours(1))) |
| .triggeredBy(DefaultTrigger.of()) |
| .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES) |
| .output(); |
| |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| |
| assertTrue(reduce.getWindow().isPresent()); |
| @SuppressWarnings("unchecked") |
| final Window<? extends BoundedWindow> window = (Window) reduce.getWindow().get(); |
| assertEquals(FixedWindows.of(org.joda.time.Duration.standardHours(1)), window.getWindowFn()); |
| assertEquals(DefaultTrigger.of(), WindowDesc.of(window).getTrigger()); |
| assertSame( |
| AccumulationMode.DISCARDING_FIRED_PANES, WindowDesc.of(window).getAccumulationMode()); |
| assertFalse(reduce.getValueComparator().isPresent()); |
| } |
| |
| @Test |
| public void testBuild_sortedValues() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, List<Long>>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .reduceBy(s -> s.collect(Collectors.toList())) |
| .withSortedValues(Long::compare) |
| .windowBy(FixedWindows.of(Duration.standardHours(1))) |
| .triggeredBy(DefaultTrigger.of()) |
| .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.getValueComparator().isPresent()); |
| } |
| |
| @Test |
| public void testBuild_sortedValuesWithNoWindowing() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, List<Long>>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .reduceBy(s -> s.collect(Collectors.toList())) |
| .withSortedValues(Long::compare) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.getValueComparator().isPresent()); |
| } |
| |
| @Test |
| public void testWindow_applyIf() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(Sums.ofLongs()) |
| .applyIf( |
| true, |
| b -> |
| b.windowBy(FixedWindows.of(Duration.standardHours(1))) |
| .triggeredBy(DefaultTrigger.of()) |
| .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertTrue(reduce.getWindow().isPresent()); |
| @SuppressWarnings("unchecked") |
| final Window<? extends BoundedWindow> window = (Window) reduce.getWindow().get(); |
| assertEquals(FixedWindows.of(org.joda.time.Duration.standardHours(1)), window.getWindowFn()); |
| assertEquals(DefaultTrigger.of(), WindowDesc.of(window).getTrigger()); |
| assertSame( |
| AccumulationMode.DISCARDING_FIRED_PANES, WindowDesc.of(window).getAccumulationMode()); |
| } |
| |
| @Test |
| public void testWindow_applyIfNot() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s) |
| .valueBy(s -> 1L) |
| .combineBy(Sums.ofLongs()) |
| .applyIf( |
| false, |
| b -> |
| b.windowBy(FixedWindows.of(Duration.standardHours(1))) |
| .triggeredBy(DefaultTrigger.of()) |
| .accumulationMode(AccumulationMode.DISCARDING_FIRED_PANES)) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| assertFalse(reduce.getWindow().isPresent()); |
| } |
| |
| @Test |
| @SuppressWarnings("unchecked") |
| public void testTypeHints_typePropagation() { |
| final PCollection<String> dataset = TestUtils.createMockDataset(TypeDescriptors.strings()); |
| final TypeDescriptor<String> keyType = TypeDescriptors.strings(); |
| final TypeDescriptor<Long> valueType = TypeDescriptors.longs(); |
| final TypeDescriptor<Long> outputType = TypeDescriptors.longs(); |
| final PCollection<KV<String, Long>> reduced = |
| ReduceByKey.of(dataset) |
| .keyBy(s -> s, keyType) |
| .valueBy(s -> 1L, valueType) |
| .combineBy(Sums.ofLongs()) |
| .output(); |
| final ReduceByKey reduce = (ReduceByKey) TestUtils.getProducer(reduced); |
| TypePropagationAssert.assertOperatorTypeAwareness(reduce, keyType, valueType, outputType); |
| } |
| } |