blob: 8cfc939755ef7afa523e5466d41f34e4b83b478a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.plans.logical
import java.util.concurrent.TimeUnit
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark.updateEventTimeColumn
import org.apache.spark.sql.catalyst.trees.TreePattern.{EVENT_TIME_WATERMARK, TreePattern, UPDATE_EVENT_TIME_WATERMARK_COLUMN}
import org.apache.spark.sql.catalyst.util.IntervalUtils
import org.apache.spark.sql.types.MetadataBuilder
import org.apache.spark.unsafe.types.CalendarInterval
object EventTimeWatermark {
/** The [[org.apache.spark.sql.types.Metadata]] key used to hold the eventTime watermark delay. */
val delayKey = "spark.watermarkDelayMs"
def getDelayMs(delay: CalendarInterval): Long = {
IntervalUtils.getDuration(delay, TimeUnit.MILLISECONDS)
}
/**
* Adds watermark delay to the metadata for newEventTime in provided attributes.
*
* If any other existing attributes have watermark delay present in their metadata, watermark
* delay will be removed from their metadata.
*/
def updateEventTimeColumn(
attributes: Seq[Attribute],
delayMs: Long,
newEventTime: Attribute): Seq[Attribute] = {
attributes.map { a =>
if (a semanticEquals newEventTime) {
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.putLong(EventTimeWatermark.delayKey, delayMs)
.build()
a.withMetadata(updatedMetadata)
} else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
// Remove existing columns tagged as eventTime for watermark
val updatedMetadata = new MetadataBuilder()
.withMetadata(a.metadata)
.remove(EventTimeWatermark.delayKey)
.build()
a.withMetadata(updatedMetadata)
} else {
a
}
}
}
}
/**
* Used to mark a user specified column as holding the event time for a row.
*/
case class EventTimeWatermark(
eventTime: Attribute,
delay: CalendarInterval,
child: LogicalPlan) extends UnaryNode {
final override val nodePatterns: Seq[TreePattern] = Seq(EVENT_TIME_WATERMARK)
// Update the metadata on the eventTime column to include the desired delay.
// This is not allowed by default - WatermarkPropagator will throw an exception. We keep the
// logic here because we also maintain the compatibility flag. (See
// SQLConf.STATEFUL_OPERATOR_ALLOW_MULTIPLE for details.)
// TODO: Disallow updating the metadata once we remove the compatibility flag.
override val output: Seq[Attribute] = {
val delayMs = EventTimeWatermark.getDelayMs(delay)
updateEventTimeColumn(child.output, delayMs, eventTime)
}
override protected def withNewChildInternal(newChild: LogicalPlan): EventTimeWatermark =
copy(child = newChild)
}
/**
* Updates the event time column to [[eventTime]] in the child output.
*
* Any watermark calculations performed after this node will use the
* updated eventTimeColumn.
*/
case class UpdateEventTimeWatermarkColumn(
eventTime: Attribute,
delay: Option[CalendarInterval],
child: LogicalPlan) extends UnaryNode {
final override val nodePatterns: Seq[TreePattern] = Seq(UPDATE_EVENT_TIME_WATERMARK_COLUMN)
override def output: Seq[Attribute] = {
if (delay.isDefined) {
val delayMs = EventTimeWatermark.getDelayMs(delay.get)
updateEventTimeColumn(child.output, delayMs, eventTime)
} else {
child.output
}
}
override protected def withNewChildInternal(
newChild: LogicalPlan): UpdateEventTimeWatermarkColumn =
copy(child = newChild)
}