blob: 80e4eb833e27e612ff305b9917f322ed481fc1ae [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file is broken into multiple sections delimited by ---. Each section specifies a set of
# reference encodings for a single standardized coder used in a specific context.
#
# Each section contains up to 3 properties:
#
# coder: a common coder spec. Currently, a URN and URNs for component coders as necessary.
# nested: a boolean meaning whether the coder was used in the nested context. Missing means to
# test both contexts, a shorthand for when the coder is invariant across context.
# examples: a map of {encoded bytes: original JSON object} encoded with the coder in the context.
# The LHS (key) is a byte array encoded as a JSON-escaped string. The RHS (value) is
# one of a few standard JSON types such as numbers, strings, dicts that map naturally
# to the type encoded by the coder.
#
# Java code snippet to generate example bytes:
# Coder<Timer<String>> coder = Timer.Coder.of(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
# Instant now = new Instant(1000L);
# Timer<String> timer = Timer.of(
# "key",
# "tag",
# Collections.singletonList(GlobalWindow.INSTANCE),
# now,
# now,
# PaneInfo.NO_FIRING);
# byte[] bytes = CoderUtils.encodeToByteArray(coder, timer);
# String str = new String(bytes, java.nio.charset.StandardCharsets.ISO_8859_1);
# String example = "";
# for(int i = 0; i < str.length(); i++){
# example += CharUtils.unicodeEscaped(str.charAt(i));
# }
#
# These choices were made to strike a balance between portability, ease of use, and simple
# legibility of this file itself.
#
# It is expected that future work will move the `coder` field into a format that it would be
# represented by the Runner API, so that it can be understood by all SDKs and harnesses.
#
# If a coder is marked non-deterministic in the coder spec, then only the decoding should be validated.
coder:
urn: "beam:coder:bytes:v1"
nested: false
examples:
"abc": abc
"ab\0c": "ab\0c"
---
coder:
urn: "beam:coder:bytes:v1"
nested: true
examples:
"\u0003abc": abc
"\u0004ab\0c": "ab\0c"
"\u00c8\u0001 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|":
" 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|"
---
coder:
urn: "beam:coder:bool:v1"
examples:
"\0": False
"\u0001": True
---
coder:
urn: "beam:coder:string_utf8:v1"
nested: false
examples:
"abc": abc
"ab\0c": "ab\0c"
"\u00c3\u00bf": "\u00ff"
"\u00e5\u0085\u0089\u00e7\u00ba\u00bf": "光线"
---
coder:
urn: "beam:coder:string_utf8:v1"
nested: true
examples:
"\u0003abc": abc
"\u0004ab\0c": "ab\0c"
"\u0002\u00c3\u00bf": "\u00ff"
"\u0006\u00e5\u0085\u0089\u00e7\u00ba\u00bf": "光线"
"\u00c8\u0001 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|":
" 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|"
---
coder:
urn: "beam:coder:varint:v1"
examples:
"\0": 0
"\u0001": 1
"\u000A": 10
"\u00c8\u0001": 200
"\u00e8\u0007": 1000
"\u00a9\u0046": 9001
"\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u0001": -1
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:varint:v1"}]
examples:
"\u0003abc\0": {key: abc, value: 0}
"\u0004ab\0c\u000A": {key: "ab\0c", value: 10}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bytes:v1"}]
nested: false
examples:
"\u0003abcdef": {key: abc, value: def}
"\u0004ab\0cde\0f": {key: "ab\0c", value: "de\0f"}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bytes:v1"}]
nested: true
examples:
"\u0003abc\u0003def": {key: abc, value: def}
"\u0004ab\0c\u0004de\0f": {key: "ab\0c", value: "de\0f"}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bool:v1"}]
examples:
"\u0003abc\u0001": {key: abc, value: True}
"\u0004ab\0c\0": {key: "ab\0c", value: False}
---
coder:
urn: "beam:coder:interval_window:v1"
examples:
"\u0080\u0000\u0001\u0052\u009a\u00a4\u009b\u0068\u0080\u00dd\u00db\u0001" : {end: 1454293425000, span: 3600000}
"\u0080\u0000\u0001\u0053\u0034\u00ec\u0074\u00e8\u0080\u0090\u00fb\u00d3\u0009" : {end: 1456881825000, span: 2592000000}
"\u007f\u00df\u003b\u0064\u005a\u001c\u00ad\u0076\u00ed\u0002" : {end: -9223372036854410, span: 365}
"\u0080\u0020\u00c4\u009b\u00a5\u00e3\u0053\u00f7\u0000" : {end: 9223372036854775, span: 0}
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:varint:v1"}]
examples:
"\0\0\0\u0001\0": [0]
"\0\0\0\u0004\u0001\n\u00c8\u0001\u00e8\u0007": [1, 10, 200, 1000]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bytes:v1"}]
examples:
"\0\0\0\u0001\u0003abc": ["abc"]
"\0\0\0\u0002\u0004ab\0c\u0004de\0f": ["ab\0c", "de\0f"]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bool:v1"}]
examples:
"\0\0\0\u0001\u0001": [True]
"\0\0\0\u0002\u0001\0": [True, False]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bytes:v1"}]
# This is for iterables of unknown length, where the encoding is not
# deterministic.
non_deterministic: True
examples:
"\u00ff\u00ff\u00ff\u00ff\u0000": []
"\u00ff\u00ff\u00ff\u00ff\u0001\u0003abc\u0000": ["abc"]
"\u00ff\u00ff\u00ff\u00ff\u0002\u0004ab\u0000c\u0004de\u0000f\u0000": ["ab\0c", "de\0f"]
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:global_window:v1"}]
examples:
"\0\0\0\u0001": [""]
---
coder:
urn: "beam:coder:timer:v1"
components: [{urn: "beam:coder:string_utf8:v1"},
{urn: "beam:coder:global_window:v1"}]
examples:
"\u0003\u006b\u0065\u0079\u0003\u0074\u0061\u0067\u0000\u0000\u0000\u0001\u0000\u0080\u0000\u0000\u0000\u0000\u0000\u0004\u00d2\u0080\u0000\u0000\u0000\u0000\u0000\u0016\u002e\u000f": {
userKey: key,
dynamicTimerTag: tag,
windows: ["global"],
clearBit: False,
fireTimestamp: 1234,
holdTimestamp: 5678,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
}
"\u0003\u006b\u0065\u0079\u0003\u0074\u0061\u0067\u0000\u0000\u0000\u0001\u0001": {
userKey: key,
dynamicTimerTag: tag,
windows: ["global"],
clearBit: True,
}
---
coder:
urn: "beam:coder:global_window:v1"
examples:
"": ""
---
# All windowed values consist of pane infos that represent NO_FIRING until full support is added
# in the Python SDK (BEAM-1522).
coder:
urn: "beam:coder:windowed_value:v1"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:global_window:v1"}]
examples:
"\u0080\0\u0001R\u009a\u00a4\u009bh\0\0\0\u0001\u000f\u0002": {
value: 2,
timestamp: 1454293425000,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: ["global"]
}
---
coder:
urn: "beam:coder:windowed_value:v1"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:interval_window:v1"}]
examples:
"\u007f\u00ff\u00ff\u00ff\u00ff\u00f9\u00e5\u0080\0\0\0\u0001\u0080\0\u0001R\u009a\u00a4\u009bh\u00c0\u008b\u0011\u000f\u0004": {
value: 4,
timestamp: -400000,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: [{end: 1454293425000, span: 280000}]
}
"\u007f\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u009c\0\0\0\u0002\u0080\0\u0001R\u009a\u00a4\u009bh\u0080\u00dd\u00db\u0001\u007f\u00df;dZ\u001c\u00adv\u00ed\u0002\u000f\u0002": {
value: 2,
timestamp: -100,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: [{end: 1454293425000, span: 3600000}, {end: -9223372036854410, span: 365}]
}
---
# ParamWindowedValueCoder with constant value of:
# timestamp: Instant.ofEpochMilli(1000)
# windows: [IntervalWindow(10, 20)]
# pane info: PaneInfo(false, true, PaneInfo.Timing.ON_TIME, 30, 40)
coder:
urn: "beam:coder:param_windowed_value:v1"
payload: "\x80\x00\x00\x00\x00\x00\x03è\x00\x00\x00\x01\x80\x00\x00\x00\x00\x00\x00\x14\n&\x1E(\x00"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:interval_window:v1"}]
examples:
"\u0002": {
value: 2,
timestamp: 1000,
pane: {is_first: False, is_last: True, timing: ON_TIME, index: 30, on_time_index: 40},
windows: [{end: 20, span: 10}]
}
---
# ParamWindowedValueCoder with constant value of:
# timestamp: BoundedWindow.TIMESTAMP_MIN_VALUE
# windows: [GlobalWindow.INSTANCE]
# pane info: PaneInfo.NO_FIRING
coder:
urn: "beam:coder:param_windowed_value:v1"
payload: "\x7Fß;dZ\x1C¬\t\x00\x00\x00\x01\x0F\x00"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:global_window:v1"}]
examples:
"\u0002": {
value: 2,
timestamp: -9223372036854775,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: ["global"]
}
---
coder:
urn: "beam:coder:double:v1"
examples:
"\0\0\0\0\0\0\0\0": "0"
"\u0080\0\0\0\0\0\0\0": "-0"
"\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": "0.1"
"\u00bf\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": "-0.1"
"\0\0\0\0\0\0\0\u0001": "4.9e-324"
"\0\u0001\0\0\0\0\0\0": "1.390671161567e-309"
"\u007f\u00ef\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff": "1.7976931348623157e308"
"\u007f\u00f0\0\0\0\0\0\0": "Infinity"
"\u00ff\u00f0\0\0\0\0\0\0": "-Infinity"
"\u007f\u00f8\0\0\0\0\0\0": "NaN"
---
coder:
urn: "beam:coder:row:v1"
# str: string, i32: int32, f64: float64, arr: array[string]
payload: "\n\t\n\x03str\x1a\x02\x10\x07\n\t\n\x03i32\x1a\x02\x10\x03\n\t\n\x03f64\x1a\x02\x10\x06\n\r\n\x03arr\x1a\x06\x1a\x04\n\x02\x10\x07\x12$4e5e554c-d4c1-4a5d-b5e1-f3293a6b9f05"
nested: false
examples:
"\u0004\u0000\u0003foo\u00a9\u0046\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a\0\0\0\u0003\u0003foo\u0003bar\u0003baz": {str: "foo", i32: 9001, f64: "0.1", arr: ["foo", "bar", "baz"]}
---
coder:
urn: "beam:coder:row:v1"
# str: nullable string, i32: nullable int32, f64: nullable float64
payload: "\n\x0b\n\x03str\x1a\x04\x08\x01\x10\x07\n\x0b\n\x03i32\x1a\x04\x08\x01\x10\x03\n\x0b\n\x03f64\x1a\x04\x08\x01\x10\x06\x12$b20c6545-57af-4bc8-b2a9-51ace21c7393"
nested: false
examples:
"\u0003\u0001\u0007": {str: null, i32: null, f64: null}
"\u0003\u0001\u0004\u0003foo\u00a9\u0046": {str: "foo", i32: 9001, f64: null}
"\u0003\u0000\u0003foo\u00a9\u0046\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": {str: "foo", i32: 9001, f64: "0.1"}
---
# Binary data generated with the python SDK:
#
# import typing
# import apache_beam as beam
# class Test(typing.NamedTuple):
# f_bool: bool
# f_byte: typing.Optional[bytes]
# schema = beam.typehints.schemas.named_tuple_to_schema(Test)
# coder = beam.coders.row_coder.RowCoder(schema)
# print("payload = %s" % schema.SerializeToString())
# print("example = %s" % coder.encode(Test(f_bool=True, f_nullable=None)))
coder:
urn: "beam:coder:row:v1"
# f_bool: boolean, f_bytes: nullable bytes
payload: "\n\x0c\n\x06f_bool\x1a\x02\x10\x08\n\x0f\n\x07f_bytes\x1a\x04\x08\x01\x10\t\x12$eea1b747-7571-43d3-aafa-9255afdceafb"
nested: false
examples:
"\x02\x01\x02\x01": {f_bool: True, f_bytes: null}
"\x02\x00\x00\x04ab\x00c": {f_bool: False, f_bytes: "ab\0c"}