blob: 34ac29abe406057ef67f5fa7735454bcb2a70252 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.samza.test.table;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.samza.application.SamzaApplication;
import org.apache.samza.application.StreamApplication;
import org.apache.samza.application.TaskApplication;
import org.apache.samza.application.descriptors.ApplicationDescriptor;
import org.apache.samza.application.descriptors.StreamApplicationDescriptor;
import org.apache.samza.application.descriptors.TaskApplicationDescriptor;
import org.apache.samza.context.Context;
import org.apache.samza.operators.KV;
import org.apache.samza.serializers.IntegerSerde;
import org.apache.samza.serializers.KVSerde;
import org.apache.samza.serializers.NoOpSerde;
import org.apache.samza.storage.kv.Entry;
import org.apache.samza.storage.kv.descriptors.RocksDbTableDescriptor;
import org.apache.samza.storage.kv.inmemory.descriptors.InMemoryTableDescriptor;
import org.apache.samza.system.IncomingMessageEnvelope;
import org.apache.samza.system.OutgoingMessageEnvelope;
import org.apache.samza.system.SystemStream;
import org.apache.samza.system.descriptors.DelegatingSystemDescriptor;
import org.apache.samza.table.ReadWriteTable;
import org.apache.samza.table.Table;
import org.apache.samza.table.descriptors.TableDescriptor;
import org.apache.samza.task.InitableTask;
import org.apache.samza.task.MessageCollector;
import org.apache.samza.task.StreamTask;
import org.apache.samza.task.StreamTaskFactory;
import org.apache.samza.task.TaskCoordinator;
import org.apache.samza.test.framework.StreamAssert;
import org.apache.samza.test.framework.TestRunner;
import org.apache.samza.test.framework.system.descriptors.InMemoryInputDescriptor;
import org.apache.samza.test.framework.system.descriptors.InMemoryOutputDescriptor;
import org.apache.samza.test.framework.system.descriptors.InMemorySystemDescriptor;
import org.junit.Test;
import static org.apache.samza.test.table.TestTableData.EnrichedPageView;
import static org.apache.samza.test.table.TestTableData.PageView;
import static org.apache.samza.test.table.TestTableData.Profile;
import static org.apache.samza.test.table.TestTableData.ProfileJsonSerde;
public class TestLocalTableWithSideInputsEndToEnd {
private static final String SYSTEM_NAME = "test";
private static final String PAGEVIEW_STREAM = "pageview";
private static final String PROFILE_STREAM = "profile";
private static final String PROFILE_TABLE = "profile-table";
private static final String ENRICHED_PAGEVIEW_STREAM = "enrichedpageview";
private static final SystemStream OUTPUT_SYSTEM_STREAM = new SystemStream(SYSTEM_NAME, ENRICHED_PAGEVIEW_STREAM);
@Test
public void testLowLevelJoinWithSideInputsTable() throws InterruptedException {
int partitionCount = 4;
IntegerSerde integerSerde = new IntegerSerde();
// for low-level, need to pre-partition the input in the same way that the profiles are partitioned
Map<Integer, List<PageView>> pageViewsPartitionedByMemberId =
TestTableData.generatePartitionedPageViews(20, partitionCount)
.values()
.stream()
.flatMap(List::stream)
.collect(Collectors.groupingBy(
pageView -> Math.abs(Arrays.hashCode(integerSerde.toBytes(pageView.getMemberId()))) % partitionCount));
runTest(
new LowLevelPageViewProfileJoin(),
pageViewsPartitionedByMemberId,
TestTableData.generatePartitionedProfiles(10, partitionCount));
}
@Test
public void testJoinWithSideInputsTable() throws InterruptedException {
runTest(
new PageViewProfileJoin(),
TestTableData.generatePartitionedPageViews(20, 4),
TestTableData.generatePartitionedProfiles(10, 4));
}
@Test
public void testJoinWithDurableSideInputTable() throws InterruptedException {
runTest(
new DurablePageViewProfileJoin(),
TestTableData.generatePartitionedPageViews(20, 4),
TestTableData.generatePartitionedProfiles(10, 4));
}
private <T extends ApplicationDescriptor<?>> void runTest(SamzaApplication<T> app,
Map<Integer, List<PageView>> pageViews,
Map<Integer, List<Profile>> profiles) throws InterruptedException {
InMemorySystemDescriptor isd = new InMemorySystemDescriptor(SYSTEM_NAME);
InMemoryInputDescriptor<PageView> pageViewStreamDesc = isd
.getInputDescriptor(PAGEVIEW_STREAM, new NoOpSerde<>());
InMemoryInputDescriptor<Profile> profileStreamDesc = isd
.getInputDescriptor(PROFILE_STREAM, new NoOpSerde<>());
InMemoryOutputDescriptor<EnrichedPageView> outputStreamDesc = isd
.getOutputDescriptor(ENRICHED_PAGEVIEW_STREAM, new NoOpSerde<>());
TestRunner.of(app)
.addInputStream(pageViewStreamDesc, pageViews)
.addInputStream(profileStreamDesc, profiles)
.addOutputStream(outputStreamDesc, 1)
.run(Duration.ofSeconds(10));
List<EnrichedPageView> expectedEnrichedPageViews = buildExpectedEnrichedPageViews(pageViews, profiles);
StreamAssert.containsInAnyOrder(expectedEnrichedPageViews, outputStreamDesc, Duration.ofSeconds(1));
}
private static List<EnrichedPageView> buildExpectedEnrichedPageViews(Map<Integer, List<PageView>> pageViews,
Map<Integer, List<Profile>> profiles) {
ImmutableMap.Builder<Integer, Profile> profilesByMemberIdBuilder = new ImmutableMap.Builder<>();
profiles.values()
.stream()
.flatMap(List::stream)
.forEach(profile -> profilesByMemberIdBuilder.put(profile.getMemberId(), profile));
Map<Integer, Profile> profilesByMemberId = profilesByMemberIdBuilder.build();
ImmutableList.Builder<EnrichedPageView> enrichedPageViewsBuilder = new ImmutableList.Builder<>();
pageViews.values()
.stream()
.flatMap(List::stream)
.forEach(pageView -> Optional.ofNullable(profilesByMemberId.get(pageView.getMemberId()))
.ifPresent(profile -> enrichedPageViewsBuilder.add(
new EnrichedPageView(pageView.getPageKey(), profile.getMemberId(), profile.getCompany()))));
return enrichedPageViewsBuilder.build();
}
static class LowLevelPageViewProfileJoin implements TaskApplication {
@Override
public void describe(TaskApplicationDescriptor appDescriptor) {
DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor(SYSTEM_NAME);
appDescriptor.withInputStream(sd.getInputDescriptor(PAGEVIEW_STREAM, new NoOpSerde<>()));
appDescriptor.withInputStream(sd.getInputDescriptor(PROFILE_STREAM, new NoOpSerde<>()));
TableDescriptor<Integer, Profile, ?> tableDescriptor = new InMemoryTableDescriptor<>(PROFILE_TABLE,
KVSerde.of(new IntegerSerde(), new ProfileJsonSerde())).withSideInputs(ImmutableList.of(PROFILE_STREAM))
.withSideInputsProcessor((msg, store) -> {
Profile profile = (Profile) msg.getMessage();
int key = profile.getMemberId();
return ImmutableList.of(new Entry<>(key, profile));
});
appDescriptor.withTable(tableDescriptor);
appDescriptor.withOutputStream(sd.getOutputDescriptor(ENRICHED_PAGEVIEW_STREAM, new NoOpSerde<>()));
appDescriptor.withTaskFactory((StreamTaskFactory) PageViewProfileJoinStreamTask::new);
}
}
static class PageViewProfileJoinStreamTask implements InitableTask, StreamTask {
private ReadWriteTable<Integer, Profile> profileTable;
@Override
public void init(Context context) {
this.profileTable = context.getTaskContext().getTable(PROFILE_TABLE);
}
@Override
public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) {
PageView pageView = (PageView) envelope.getMessage();
Profile profile = this.profileTable.get(pageView.getMemberId());
if (profile != null) {
EnrichedPageView enrichedPageView =
new EnrichedPageView(pageView.getPageKey(), profile.getMemberId(), profile.getCompany());
collector.send(new OutgoingMessageEnvelope(OUTPUT_SYSTEM_STREAM, enrichedPageView));
}
}
}
static class PageViewProfileJoin implements StreamApplication {
@Override
public void describe(StreamApplicationDescriptor appDescriptor) {
Table<KV<Integer, TestTableData.Profile>> table = appDescriptor.getTable(getTableDescriptor());
DelegatingSystemDescriptor sd = new DelegatingSystemDescriptor(SYSTEM_NAME);
appDescriptor.getInputStream(sd.getInputDescriptor(PAGEVIEW_STREAM, new NoOpSerde<TestTableData.PageView>()))
.partitionBy(TestTableData.PageView::getMemberId, v -> v,
KVSerde.of(new IntegerSerde(), new TestTableData.PageViewJsonSerde()), "partition-page-view")
.join(table, new PageViewToProfileJoinFunction())
.sendTo(appDescriptor.getOutputStream(sd.getOutputDescriptor(ENRICHED_PAGEVIEW_STREAM, new NoOpSerde<>())));
}
protected TableDescriptor<Integer, Profile, ?> getTableDescriptor() {
return new InMemoryTableDescriptor<>(PROFILE_TABLE, KVSerde.of(new IntegerSerde(), new ProfileJsonSerde()))
.withSideInputs(ImmutableList.of(PROFILE_STREAM))
.withSideInputsProcessor((msg, store) -> {
Profile profile = (Profile) msg.getMessage();
int key = profile.getMemberId();
return ImmutableList.of(new Entry<>(key, profile));
});
}
}
static class DurablePageViewProfileJoin extends PageViewProfileJoin {
@Override
protected TableDescriptor<Integer, Profile, ?> getTableDescriptor() {
return new RocksDbTableDescriptor<>(PROFILE_TABLE, KVSerde.of(new IntegerSerde(), new ProfileJsonSerde()))
.withSideInputs(ImmutableList.of(PROFILE_STREAM))
.withSideInputsProcessor((msg, store) -> {
TestTableData.Profile profile = (TestTableData.Profile) msg.getMessage();
int key = profile.getMemberId();
return ImmutableList.of(new Entry<>(key, profile));
});
}
}
}