blob: 66eed6e86d4cb67c7d5e602011a97b186c08f55d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.io.gcp.pubsub;
import static org.apache.beam.sdk.io.gcp.pubsub.PubsubMessageToRow.TIMESTAMP_FIELD;
import org.apache.beam.sdk.schemas.transforms.DropFields;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.WithTimestamps;
import org.apache.beam.sdk.values.PCollection;
import org.apache.beam.sdk.values.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Adds a timestamp attribute if desired and filters it out of the underlying row if no timestamp
* attribute exists.
*/
@SuppressWarnings({
"nullness" // TODO(https://issues.apache.org/jira/browse/BEAM-10402)
})
class AddTimestampAttribute extends PTransform<PCollection<Row>, PCollection<Row>> {
private static final Logger LOG = LoggerFactory.getLogger(AddTimestampAttribute.class);
private final boolean useTimestampAttribute;
AddTimestampAttribute(boolean useTimestampAttribute) {
this.useTimestampAttribute = useTimestampAttribute;
}
@Override
public PCollection<Row> expand(PCollection<Row> input) {
// If a timestamp attribute is used, make sure the TIMESTAMP_FIELD is propagated to the
// element's event time. PubSubIO will populate the attribute from there.
PCollection<Row> withTimestamp =
useTimestampAttribute
? input.apply(WithTimestamps.of((row) -> row.getDateTime(TIMESTAMP_FIELD).toInstant()))
: input;
PCollection<Row> rows;
if (withTimestamp.getSchema().hasField(TIMESTAMP_FIELD)) {
if (!useTimestampAttribute) {
// Warn the user if they're writing data to TIMESTAMP_FIELD, but event timestamp is mapped
// to publish time. The data will be dropped.
LOG.warn(
String.format(
"Dropping output field '%s' before writing to PubSub because this is a read-only "
+ "column. To preserve this information you must configure a timestamp attribute.",
TIMESTAMP_FIELD));
}
rows = withTimestamp.apply(DropFields.fields(TIMESTAMP_FIELD));
} else {
rows = withTimestamp;
}
return rows;
}
}