blob: 25de4be4997ab1f319ce4294f9650d2eef5339a9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.gcp.pubsub;
import java.io.IOException;
import java.io.Serializable;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.OutgoingMessage;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubClient.TopicPath;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubTestClient.PubsubTestClientFactory;
import org.apache.beam.sdk.io.gcp.pubsub.PubsubUnboundedSink.RecordIdMethod;
import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
import org.apache.beam.sdk.testing.CoderProperties;
import org.apache.beam.sdk.testing.TestPipeline;
import org.apache.beam.sdk.transforms.Create;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v20_0.com.google.common.hash.Hashing;
import org.joda.time.Duration;
import org.joda.time.Instant;
import org.junit.Rule;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
/** Test PubsubUnboundedSink. */
@RunWith(JUnit4.class)
public class PubsubUnboundedSinkTest implements Serializable {
private static final TopicPath TOPIC = PubsubClient.topicPathFromName("testProject", "testTopic");
private static final String DATA = "testData";
private static final ImmutableMap<String, String> ATTRIBUTES =
ImmutableMap.<String, String>builder().put("a", "b").put("c", "d").build();
private static final long TIMESTAMP = 1234L;
private static final String TIMESTAMP_ATTRIBUTE = "timestamp";
private static final String ID_ATTRIBUTE = "id";
private static final int NUM_SHARDS = 10;
private static class Stamp extends DoFn<String, PubsubMessage> {
private final Map<String, String> attributes;
private Stamp() {
this(ImmutableMap.of());
}
private Stamp(Map<String, String> attributes) {
this.attributes = attributes;
}
@ProcessElement
public void processElement(ProcessContext c) {
c.outputWithTimestamp(
new PubsubMessage(c.element().getBytes(StandardCharsets.UTF_8), attributes),
new Instant(TIMESTAMP));
}
}
private String getRecordId(String data) {
return Hashing.murmur3_128().hashBytes(data.getBytes(StandardCharsets.UTF_8)).toString();
}
@Rule public transient TestPipeline p = TestPipeline.create();
@Test
public void saneCoder() throws Exception {
OutgoingMessage message =
new OutgoingMessage(
DATA.getBytes(StandardCharsets.UTF_8), ImmutableMap.of(), TIMESTAMP, getRecordId(DATA));
CoderProperties.coderDecodeEncodeEqual(PubsubUnboundedSink.CODER, message);
CoderProperties.coderSerializable(PubsubUnboundedSink.CODER);
}
@Test
public void sendOneMessage() throws IOException {
List<OutgoingMessage> outgoing =
ImmutableList.of(
new OutgoingMessage(
DATA.getBytes(StandardCharsets.UTF_8), ATTRIBUTES, TIMESTAMP, getRecordId(DATA)));
int batchSize = 1;
int batchBytes = 1;
try (PubsubTestClientFactory factory =
PubsubTestClient.createFactoryForPublish(TOPIC, outgoing, ImmutableList.of())) {
PubsubUnboundedSink sink =
new PubsubUnboundedSink(
factory,
StaticValueProvider.of(TOPIC),
TIMESTAMP_ATTRIBUTE,
ID_ATTRIBUTE,
NUM_SHARDS,
batchSize,
batchBytes,
Duration.standardSeconds(2),
RecordIdMethod.DETERMINISTIC);
p.apply(Create.of(ImmutableList.of(DATA))).apply(ParDo.of(new Stamp(ATTRIBUTES))).apply(sink);
p.run();
}
// The PubsubTestClientFactory will assert fail on close if the actual published
// message does not match the expected publish message.
}
@Test
public void sendOneMessageWithoutAttributes() throws IOException {
List<OutgoingMessage> outgoing =
ImmutableList.of(
new OutgoingMessage(
DATA.getBytes(StandardCharsets.UTF_8),
null /* attributes */,
TIMESTAMP,
getRecordId(DATA)));
try (PubsubTestClientFactory factory =
PubsubTestClient.createFactoryForPublish(TOPIC, outgoing, ImmutableList.of())) {
PubsubUnboundedSink sink =
new PubsubUnboundedSink(
factory,
StaticValueProvider.of(TOPIC),
TIMESTAMP_ATTRIBUTE,
ID_ATTRIBUTE,
NUM_SHARDS,
1 /* batchSize */,
1 /* batchBytes */,
Duration.standardSeconds(2),
RecordIdMethod.DETERMINISTIC);
p.apply(Create.of(ImmutableList.of(DATA)))
.apply(ParDo.of(new Stamp(null /* attributes */)))
.apply(sink);
p.run();
}
// The PubsubTestClientFactory will assert fail on close if the actual published
// message does not match the expected publish message.
}
@Test
public void sendMoreThanOneBatchByNumMessages() throws IOException {
List<OutgoingMessage> outgoing = new ArrayList<>();
List<String> data = new ArrayList<>();
int batchSize = 2;
int batchBytes = 1000;
for (int i = 0; i < batchSize * 10; i++) {
String str = String.valueOf(i);
outgoing.add(
new OutgoingMessage(
str.getBytes(StandardCharsets.UTF_8),
ImmutableMap.of(),
TIMESTAMP,
getRecordId(str)));
data.add(str);
}
try (PubsubTestClientFactory factory =
PubsubTestClient.createFactoryForPublish(TOPIC, outgoing, ImmutableList.of())) {
PubsubUnboundedSink sink =
new PubsubUnboundedSink(
factory,
StaticValueProvider.of(TOPIC),
TIMESTAMP_ATTRIBUTE,
ID_ATTRIBUTE,
NUM_SHARDS,
batchSize,
batchBytes,
Duration.standardSeconds(2),
RecordIdMethod.DETERMINISTIC);
p.apply(Create.of(data)).apply(ParDo.of(new Stamp())).apply(sink);
p.run();
}
// The PubsubTestClientFactory will assert fail on close if the actual published
// message does not match the expected publish message.
}
@Test
public void sendMoreThanOneBatchByByteSize() throws IOException {
List<OutgoingMessage> outgoing = new ArrayList<>();
List<String> data = new ArrayList<>();
int batchSize = 100;
int batchBytes = 10;
int n = 0;
while (n < batchBytes * 10) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < batchBytes; i++) {
sb.append(String.valueOf(n));
}
String str = sb.toString();
outgoing.add(
new OutgoingMessage(
str.getBytes(StandardCharsets.UTF_8),
ImmutableMap.of(),
TIMESTAMP,
getRecordId(str)));
data.add(str);
n += str.length();
}
try (PubsubTestClientFactory factory =
PubsubTestClient.createFactoryForPublish(TOPIC, outgoing, ImmutableList.of())) {
PubsubUnboundedSink sink =
new PubsubUnboundedSink(
factory,
StaticValueProvider.of(TOPIC),
TIMESTAMP_ATTRIBUTE,
ID_ATTRIBUTE,
NUM_SHARDS,
batchSize,
batchBytes,
Duration.standardSeconds(2),
RecordIdMethod.DETERMINISTIC);
p.apply(Create.of(data)).apply(ParDo.of(new Stamp())).apply(sink);
p.run();
}
// The PubsubTestClientFactory will assert fail on close if the actual published
// message does not match the expected publish message.
}
// TODO: We would like to test that failed Pubsub publish calls cause the already assigned
// (and random) record ids to be reused. However that can't be done without the test runnner
// supporting retrying bundles.
}