website/src/_data/capability-matrix.yml - beam - Git at Google

 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Welcome to Jekyll!

 columns:
   - class: model
     name: Beam Model
   - class: dataflow
     name: Google Cloud Dataflow
   - class: flink
     name: Apache Flink
   - class: spark
     name: Apache Spark
   - class: apex
     name: Apache Apex
   - class: gearpump
     name: Apache Gearpump
   - class: mapreduce
     name: Apache Hadoop MapReduce
   - class: jstorm
     name: JStorm
   - class: ibmstreams
     name: IBM Streams
   - class: samza
     name: Apache Samza
   - class: nemo
     name: Apache Nemo
   - class: jet
     name: Hazelcast Jet

 categories:
   - description: What is being computed?
     anchor: what
     color-b: 'ca1'
     color-y: 'ec3'
     color-p: 'fe5'
     color-n: 'ddd'
     rows:
       - name: ParDo
         values:
           - class: model
             l1: 'Yes'
             l2: element-wise processing
             l3: Element-wise transformation parameterized by a chunk of user code. Elements are processed in bundles, with initialization and termination hooks. Bundle size is chosen by the runner and cannot be controlled by user code. ParDo processes a main input PCollection one element at a time, but provides side input access to additional PCollections.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Batch mode uses large bundle sizes. Streaming uses smaller bundle sizes.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ParDo itself, as per-element transformation with UDFs, is fully supported by Flink for both batch and streaming.
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3: ParDo applies per-element transformations as Spark FlatMapFunction.
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: Supported through Apex operator that wraps the function and processes data as single element bundles.
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: Gearpump wraps the per-element transformation function into processor execution.
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: Supported with per-element transformation.
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''
       - name: GroupByKey
         values:
           - class: model
             l1: 'Yes'
             l2: key grouping
             l3: Grouping of key-value pairs per key, window, and pane. (See also other tabs.)
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: "Uses Flink's keyBy for key grouping. When grouping by window in streaming (creating the panes) the Flink runner uses the Beam code. This guarantees support for all windowing and triggering mechanisms."
           - class: spark
             l1: 'Partially'
             l2: fully supported in batch mode
             l3: "Using Spark's <tt>groupByKey</tt>. GroupByKey with multiple trigger firings in streaming mode is a work in progress."
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: "Apex runner uses the Beam code for grouping by window and thereby has support for all windowing and triggering mechanisms. Runner does not implement partitioning yet (BEAM-838)"
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: "Use Gearpump's groupBy and window for key grouping and translate Beam's windowing and triggering to Gearpump's internal implementation."
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: "Uses Samza's partitionBy for key grouping and Beam's logic for window aggregation and triggering."
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''
       - name: Flatten
         values:
           - class: model
             l1: 'Yes'
             l2: collection concatenation
             l3: Concatenates multiple homogenously typed collections together.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''
       - name: Combine
         values:
           - class: model
             l1: 'Yes'
             l2: associative &amp; commutative aggregation
             l3: 'Application of an associative, commutative operation over all values ("globally") or over all values associated with each key ("per key"). Can be implemented using ParDo, but often more efficient implementations exist.'
           - class: dataflow
             l1: 'Yes'
             l2: 'efficient execution'
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: 'fully supported'
             l3: Uses a combiner for pre-aggregation for batch and streaming.
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3: "Using Spark's <tt>combineByKey</tt> and <tt>aggregate</tt> functions."
           - class: apex
             l1: 'Yes'
             l2: 'fully supported'
             l3: "Default Beam translation. Currently no efficient pre-aggregation (BEAM-935)."
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: Use combiner for efficient pre-aggregation.
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: 'Batch mode uses pre-aggregation'
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: 'Batch mode uses pre-aggregation'
       - name: Composite Transforms
         values:
           - class: model
             l1: 'Yes'
             l2: user-defined transformation subgraphs
             l3: Allows easy extensibility for library writers.  In the near future, we expect there to be more information provided at this level -- customized metadata hooks for monitoring, additional runtime/environment hooks, etc.
           - class: dataflow
             l1: 'Partially'
             l2: supported via inlining
             l3: Currently composite transformations are inlined during execution. The structure is later recreated from the names, but other transform level information (if added to the model) will be lost.
           - class: flink
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: spark
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: apex
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: gearpump
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: samza
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Partially'
             l2: supported via inlining
             l3: ''
       - name: Side Inputs
         values:
           - class: model
             l1: 'Yes'
             l2: additional elements available during DoFn execution
             l3: Side inputs are additional <tt>PCollections</tt> whose contents are computed during pipeline execution and then made accessible to DoFn code. The exact shape of the side input depends both on the <tt>PCollectionView</tt> used to describe the access pattern (interable, map, singleton) and the window of the element from the main input that is currently being processed.
           - class: dataflow
             l1: 'Yes'
             l2: some size restrictions in streaming
             l3: Batch mode supports a distributed implementation, but streaming mode may force some size restrictions. Neither mode is able to push lookups directly up into key-based sources.
           - class: flink
             l1: 'Yes'
             l2: some size restrictions in streaming
             l3: Batch mode supports a distributed implementation, but streaming mode may force some size restrictions. Neither mode is able to push lookups directly up into key-based sources.
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3: "Using Spark's broadcast variables. In streaming mode, side inputs may update but only between micro-batches."
           - class: apex
             l1: 'Yes'
             l2: size restrictions
             l3: No distributed implementation and therefore size restrictions.
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: Implemented by merging side input as a normal stream in Gearpump
           - class: mapreduce
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: some size restrictions
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: Uses Samza's broadcast operator to distribute the side inputs.
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Partially'
             l2: with restrictions
             l3: Supported only when the side input source is bounded and windowing uses global window
       - name: Source API
         values:
           - class: model
             l1: 'Yes'
             l2: user-defined sources
             l3: Allows users to provide additional input sources. Supports both bounded and unbounded data. Includes hooks necessary to provide efficient parallelization (size estimation, progress information, dynamic splitting, etc).
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Support includes autotuning features (https://cloud.google.com/dataflow/service/dataflow-service-desc#autotuning-features).
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3:
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3:
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3:
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'Partially'
             l2: bounded source only
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''
       - name: Splittable DoFn (SDF)
         values:
           - class: model
             l1: 'Partially'
             l2: DoFn where processing of each element can be split for parallelism, or suspended and resumed
             l3: Allows users to develop DoFn's that process a single element in portions ("restrictions"), executed in parallel or sequentially. This supersedes the unbounded and bounded `Source` APIs by supporting all of their features on a per-element basis. See http://s.apache.org/splittable-do-fn. Design is in progress on achieving parity with Source API regarding progress signals.
           - class: dataflow
             l1: 'Yes'
             l2:
             l3: Does not yet support autotuning features of the Source API.
           - class: flink
             l1: 'Yes'
             l2:
             l3:
           - class: spark
             l1: 'Partially'
             l2: supports bounded-per-element SDFs
             l3:
           - class: apex
             l1: 'Partially'
             l2: supports bounded-per-element SDFs
             l3: implementation in streaming mode coming soon
           - class: gearpump
             l1: 'Partially'
             l2: supports bounded-per-element SDFs
             l3:
           - class: mapreduce
             l1: 'No'
             l2: not implemented
             l3:
           - class: jstorm
             l1: 'No'
             l2: not implemented
             l3:
           - class: ibmstreams
             l1: 'No'
             l2: not implemented
             l3:
           - class: samza
             l1: 'Partially'
             l2: supports bounded-per-element SDFs
             l3:
           - class: nemo
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: jet
             l1: 'No'
             l2: not implemented
             l3: ''
       - name: Metrics
         values:
           - class: model
             l1: 'Partially'
             l2: user-provided metrics
             l3: Allow transforms to gather simple metrics across bundles in a <tt>PTransform</tt>. Provide a mechanism to obtain both committed and attempted metrics. Semantically similar to using an additional output, but support partial results as the transform executes, and support both committed and attempted values. Will likely want to augment <tt>Metrics</tt> to be more useful for processing unbounded data by making them windowed.
           - class: dataflow
             l1: 'Partially'
             l2: ''
             l3: Gauge metrics are not supported. All other metric types are supported.
           - class: flink
             l1: 'Partially'
             l2: All metrics types are supported.
             l3: Only attempted values are supported. No committed values for metrics.
           - class: spark
             l1: 'Partially'
             l2: All metric types are supported.
             l3: Only attempted values are supported. No committed values for metrics.
           - class: apex
             l1: 'No'
             l2: Not implemented in runner.
             l3:
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: not implemented
           - class: mapreduce
             l1: 'Partially'
             l2: Only attempted counters are supported
             l3: ''
           - class: jstorm
             l1: 'Partially'
             l2: Metrics are only supported in local mode.
             l3: ''
           - class: ibmstreams
             l1: 'Partially'
             l2: All metrics types are supported.
             l3: Only attempted values are supported. No committed values for metrics.
           - class: samza
             l1: 'Partially'
             l2: Counter and Gauge are supported.
             l3: Only attempted values are supported. No committed values for metrics.
           - class: nemo
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: jet
             l1: 'Partially'
             l2: All metrics types supported, both in batching and streaming mode.
             l3: Doesn't differentiate between committed and attempted values.
       - name: Stateful Processing
         values:
           - class: model
             l1: 'Yes'
             l2: storage per key, per window
             l3: Allows fine-grained access to per-key, per-window persistent state. Necessary for certain use cases (e.g. high-volume windows which store large amounts of data, but typically only access small portions of it; complex state machines; etc.) that are not easily or efficiently addressed via <tt>Combine</tt> or <tt>GroupByKey</tt>+<tt>ParDo</tt>.
           - class: dataflow
             l1: 'Partially'
             l2: non-merging windows
             l3: State is supported for non-merging windows. SetState and MapState are not yet supported.
           - class: flink
             l1: 'Partially'
             l2: non-merging windows
             l3: State is supported for non-merging windows. SetState and MapState are not yet supported.
           - class: spark
             l1: 'No'
             l2: not implemented
             l3: Spark supports per-key state with <tt>mapWithState()</tt> so support should be straightforward.
           - class: apex
             l1: 'Partially'
             l2: non-merging windows
             l3: State is supported for non-merging windows. SetState and MapState are not yet supported.
           - class: gearpump
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: mapreduce
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
           - class: jstorm
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
           - class: ibmstreams
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
           - class: samza
             l1: 'Partially'
             l2: non-merging windows
             l3: 'States are backed up by either rocksDb KV store or in-memory hash map, and persist using changelog.'
           - class: nemo
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: jet
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
   - description: Where in event time?
     anchor: where
     color-b: '37d'
     color-y: '59f'
     color-p: '8cf'
     color-n: 'ddd'
     rows:
       - name: Global windows
         values:
           - class: model
             l1: 'Yes'
             l2: all time
             l3: The default window which covers all of time. (Basically how traditional batch cases fit in the model.)
           - class: dataflow
             l1: 'Yes'
             l2: default
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Fixed windows
         values:
           - class: model
             l1: 'Yes'
             l2: periodic, non-overlapping
             l3: Fixed-size, timestamp-based windows. (Hourly, Daily, etc)
           - class: dataflow
             l1: 'Yes'
             l2: built-in
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Sliding windows
         values:
           - class: model
             l1: 'Yes'
             l2: periodic, overlapping
             l3: Possibly overlapping fixed-size timestamp-based windows (Every minute, use the last ten minutes of data.)
           - class: dataflow
             l1: 'Yes'
             l2: built-in
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Session windows
         values:
           - class: model
             l1: 'Yes'
             l2: activity-based
             l3: Based on bursts of activity separated by a gap size. Different per key.
           - class: dataflow
             l1: 'Yes'
             l2: built-in
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Custom windows
         values:
           - class: model
             l1: 'Yes'
             l2: user-defined windows
             l3: All windows must implement <tt>BoundedWindow</tt>, which specifies a max timestamp. Each <tt>WindowFn</tt> assigns elements to an associated window.
           - class: dataflow
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Custom merging windows
         values:
           - class: model
             l1: 'Yes'
             l2: user-defined merging windows
             l3: A custom <tt>WindowFn</tt> additionally specifies whether and how to merge windows.
           - class: dataflow
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''
       - name: Timestamp control
         values:
           - class: model
             l1: 'Yes'
             l2: output timestamp for window panes
             l3: For a grouping transform, such as GBK or Combine, an OutputTimeFn specifies (1) how to combine input timestamps within a window and (2) how to merge aggregated timestamps when windows merge.
           - class: dataflow
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: mapreduce
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: supported
             l3: ''

   - description: When in processing time?
     anchor: when
     color-b: '6a4'
     color-y: '8c6'
     color-p: 'ae8'
     color-n: 'ddd'
     rows:

       - name: Configurable triggering
         values:
           - class: model
             l1: 'Yes'
             l2: user customizable
             l3: Triggering may be specified by the user (instead of simply driven by hardcoded defaults).
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Fully supported in streaming mode. In batch mode, intermediate trigger firings are effectively meaningless.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: batch-only runner
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Event-time triggers
         values:
           - class: model
             l1: 'Yes'
             l2: relative to event time
             l3: Triggers that fire in response to event-time completeness signals, such as watermarks progressing.
           - class: dataflow
             l1: 'Yes'
             l2: yes in streaming, fixed granularity in batch
             l3: Fully supported in streaming mode. In batch mode, currently watermark progress jumps from the beginning of time to the end of time once the input has been fully consumed, thus no additional triggering granularity is available.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Processing-time triggers
         values:
           - class: model
             l1: 'Yes'
             l2: relative to processing time
             l3: Triggers that fire in response to processing-time advancing.
           - class: dataflow
             l1: 'Yes'
             l2: yes in streaming, fixed granularity in batch
             l3: Fully supported in streaming mode. In batch mode, from the perspective of triggers, processing time currently jumps from the beginning of time to the end of time once the input has been fully consumed, thus no additional triggering granularity is available.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: "This is Spark streaming's native model"
             l3: "Spark processes streams in micro-batches. The micro-batch size is actually a pre-set, fixed, time interval. Currently, the runner takes the first window size in the pipeline and sets it's size as the batch interval. Any following window operations will be considered processing time windows and will affect triggering."
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Count triggers
         values:
           - class: model
             l1: 'Yes'
             l2: every N elements
             l3: Triggers that fire after seeing at least N elements.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Fully supported in streaming mode. In batch mode, elements are processed in the largest bundles possible, so count-based triggers are effectively meaningless.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: '[Meta]data driven triggers'
         values:
           - class: model
             jira: BEAM-101
             l1: 'No'
             l2: in response to data
             l3: Triggers that fire in response to attributes of the data being processed.
           - class: dataflow
             l1: 'No'
             l2: pending model support
             l3:
           - class: flink
             l1: 'No'
             l2: pending model support
             l3:
           - class: spark
             l1: 'No'
             l2: pending model support
             l3:
           - class: apex
             l1: 'No'
             l2: pending model support
             l3:
           - class: gearpump
             l1: 'No'
             l2: pending model support
             l3:
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3:
           - class: jstorm
             l1: 'No'
             l2: pending model support
             l3:
           - class: ibmstreams
             l1: 'No'
             l2: pending model support
             l3:
           - class: samza
             l1: 'No'
             l2: pending model support
             l3:
           - class: nemo
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: jet
             l1: 'No'
             l2: pending model support
             l3: ''

       - name: Composite triggers
         values:
           - class: model
             l1: 'Yes'
             l2: compositions of one or more sub-triggers
             l3: Triggers which compose other triggers in more complex structures, such as logical AND, logical OR, early/on-time/late, etc.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Allowed lateness
         values:
           - class: model
             l1: 'Yes'
             l2: event-time bound on window lifetimes
             l3: A way to bound the useful lifetime of a window (in event time), after which any unemitted results may be materialized, the window contents may be garbage collected, and any addtional late data that arrive for the window may be discarded.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Fully supported in streaming mode. In batch mode no data is ever late.
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Timers
         values:
           - class: model
             l1: 'Yes'
             l2: delayed processing callbacks
             l3: A fine-grained mechanism for performing work at some point in the future, in either the event-time or processing-time domain. Useful for orchestrating delayed events, timeouts, etc in complex state per-key, per-window state machines.
           - class: dataflow
             l1: 'Partially'
             l2: non-merging windows
             l3: Dataflow supports timers in non-merging windows.
           - class: flink
             l1: 'Partially'
             l2: non-merging windows
             l3: The Flink Runner supports timers in non-merging windows.
           - class: spark
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: apex
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
           - class: ibmstreams
             l1: 'Partially'
             l2: non-merging windows
             l3: ''
           - class: samza
             l1: 'Partially'
             l2: non-merging windows
             l3: The Samza Runner supports timers in non-merging windows.
           - class: nemo
             l1: 'No'
             l2: not implemented
             l3: ''
           - class: jet
             l1: 'Partially'
             l2: non-merging windows
             l3: ''

   - description: How do refinements relate?
     anchor: how
     color-b: 'b55'
     color-y: 'd77'
     color-p: 'faa'
     color-n: 'ddd'
     rows:

       - name: Discarding
         values:
           - class: model
             l1: 'Yes'
             l2: panes discard elements when fired
             l3: Elements are discarded from accumulated state as their pane is fired.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'Yes'
             l2: fully supported
             l3: 'Spark streaming natively discards elements after firing.'
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: gearpump
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: batch-only runner
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: Accumulating
         values:
           - class: model
             l1: 'Yes'
             l2: panes accumulate elements across firings
             l3: Elements are accumulated in state across multiple pane firings for the same window.
           - class: dataflow
             l1: 'Yes'
             l2: fully supported
             l3: Requires that the accumulated pane fits in memory, after being passed through the combiner (if relevant)
           - class: flink
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: spark
             l1: 'No'
             l2: ''
             l3: ''
           - class: apex
             l1: 'Yes'
             l2: fully supported
             l3: 'Size restriction, see combine support.'
           - class: gearpump
             l1: 'No'
             l2: ''
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: ibmstreams
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: samza
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: nemo
             l1: 'Yes'
             l2: fully supported
             l3: ''
           - class: jet
             l1: 'Yes'
             l2: fully supported
             l3: ''

       - name: 'Accumulating &amp; Retracting'
         values:
           - class: model
             jira: BEAM-91
             l1: 'No'
             l2: accumulation plus retraction of old panes
             l3: Elements are accumulated across multiple pane firings and old emitted values are retracted. Also known as "backsies" ;-D
           - class: dataflow
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: flink
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: spark
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: apex
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: gearpump
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: mapreduce
             l1: 'No'
             l2: ''
             l3: ''
           - class: jstorm
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: ibmstreams
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: samza
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: nemo
             l1: 'No'
             l2: pending model support
             l3: ''
           - class: jet
             l1: 'No'
             l2: pending model support
             l3: ''
   - description: Additional common features not yet part of the Beam model
     anchor: misc
     color-b: 'aaa'
     color-y: 'bbb'
     color-p: 'ccc'
     color-n: 'ddd'
     rows:
       - name: Drain
         values:
           - class: model
             l1: 'Partially'
             l2:
             l3: APIs and semantics for draining a pipeline are under discussion. This would cause incomplete aggregations to be emitted regardless of trigger and tagged with metadata indicating it is incompleted.
           - class: dataflow
             l1: 'Partially'
             l2:
             l3: Dataflow has a native drain operation, but it does not work in the presence of event time timer loops. Final implemention pending model support.
           - class: flink
             l1: 'Partially'
             l2:
             l3: Flink supports taking a "savepoint" of the pipeline and shutting the pipeline down after its completion.
           - class: spark
             l1:
             l2:
             l3:
           - class: apex
             l1:
             l2:
             l3:
           - class: gearpump
             l1:
             l2:
             l3:
           - class: mapreduce
             l1:
             l2:
             l3:
           - class: jstorm
             l1:
             l2:
             l3:
           - class: ibmstreams
             l1:
             l2:
             l3:
           - class: samza
             l1:
             l2:
             l3:
           - class: nemo
             l1:
             l2:
             l3:
       - name: Checkpoint
         values:
           - class: model
             l1: 'Partially'
             l2:
             l3: APIs and semantics for saving a pipeline checkpoint are under discussion. This would be a runner-specific materialization of the pipeline state required to resume or duplicate the pipeline.
           - class: dataflow
             l1: 'No'
             l2:
             l3:
           - class: flink
             l1: 'Partially'
             l2:
             l3: Flink has a native savepoint capability.
           - class: spark
             l1: 'Partially'
             l2:
             l3: Spark has a native savepoint capability.
           - class: apex
             l1:
             l2:
             l3:
           - class: gearpump
             l1:
             l2:
             l3:
           - class: mapreduce
             l1:
             l2:
             l3:
           - class: jstorm
             l1:
             l2:
             l3:
           - class: ibmstreams
             l1:
             l2:
             l3:
           - class: samza
             l1: 'Partially'
             l2:
             l3: Samza has a native checkpoint capability.
           - class: nemo
             l1:
             l2:
             l3:
           - class: jet
             l1:
             l2:
             l3: