blob: c36b942bc34ffc1a8e5cc2b152fcc966ef969699 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.sdk.transforms.splittabledofn;
import org.apache.beam.sdk.transforms.DoFn;
/**
* Manages access to the restriction and keeps track of its claimed part for a <a
* href="https://s.apache.org/splittable-do-fn">splittable</a> {@link DoFn}.
*/
public abstract class RestrictionTracker<RestrictionT, PositionT> {
/**
* Attempts to claim the block of work in the current restriction identified by the given
* position.
*
* <p>If this succeeds, the DoFn MUST execute the entire block of work. If this fails:
*
* <ul>
* <li>{@link DoFn.ProcessElement} MUST return {@link DoFn.ProcessContinuation#stop} without
* performing any additional work or emitting output (note that emitting output or
* performing work from {@link DoFn.ProcessElement} is also not allowed before the first
* call to this method).
* <li>{@link RestrictionTracker#checkDone} MUST succeed.
* </ul>
*/
public abstract boolean tryClaim(PositionT position);
/**
* Returns a restriction accurately describing the full range of work the current {@link
* DoFn.ProcessElement} call will do, including already completed work.
*/
public abstract RestrictionT currentRestriction();
/**
* Splits current restriction based on {@code fractionOfRemainder}.
*
* <p>If splitting the current restriction is possible, the current restriction is split into a
* primary and residual restriction pair. This invocation updates the {@link
* #currentRestriction()} to be the primary restriction effectively having the current {@link
* DoFn.ProcessElement} execution responsible for performing the work that the primary restriction
* represents. The residual restriction will be executed in a separate {@link DoFn.ProcessElement}
* invocation (likely in a different process). The work performed by executing the primary and
* residual restrictions as separate {@link DoFn.ProcessElement} invocations MUST be equivalent to
* the work performed as if this split never occurred.
*
* <p>The {@code fractionOfRemainder} should be used in a best effort manner to choose a primary
* and residual restriction based upon the fraction of the remaining work that the current {@link
* DoFn.ProcessElement} invocation is responsible for. For example, if a {@link
* DoFn.ProcessElement} was reading a file with a restriction representing the offset range {@code
* [100, 200)} and has processed up to offset 130 with a {@code fractionOfRemainder} of {@code
* 0.7}, the primary and residual restrictions returned would be {@code [100, 179), [179, 200)}
* (note: {@code currentOffset + fractionOfRemainder * remainingWork = 130 + 0.7 * 70 = 179}).
*
* <p>{@code fractionOfRemainder = 0} means a checkpoint is required.
*
* <p>The API is recommended to be implemented for batch pipeline given that it is very important
* for pipeline scaling and end to end pipeline execution.
*
* <p>The API is required to be implemented for a streaming pipeline.
*
* @param fractionOfRemainder A hint as to the fraction of work the primary restriction should
* represent based upon the current known remaining amount of work.
* @return a {@link SplitResult} if a split was possible, otherwise returns {@code null}.
*/
public abstract SplitResult<RestrictionT> trySplit(double fractionOfRemainder);
/**
* Called by the runner after {@link DoFn.ProcessElement} returns.
*
* <p>Must throw an exception with an informative error message, if there is still any unclaimed
* work remaining in the restriction.
*/
public abstract void checkDone() throws IllegalStateException;
}