blob: 9de15ac8e94d8d6fc6e55cde8c158f4785a6f8f2 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file is broken into multiple sections delimited by ---. Each section specifies a set of
# reference encodings for a single standardized coder used in a specific context.
#
# Each section contains up to 3 properties:
#
# coder: a common coder spec. Currently, a URN and URNs for component coders as necessary.
# nested: a boolean meaning whether the coder was used in the nested context. Missing means to
# test both contexts, a shorthand for when the coder is invariant across context.
# examples: a map of {encoded bytes: original JSON object} encoded with the coder in the context.
# The LHS (key) is a byte array encoded as a JSON-escaped string. The RHS (value) is
# one of a few standard JSON types such as numbers, strings, dicts that map naturally
# to the type encoded by the coder.
#
# These choices were made to strike a balance between portability, ease of use, and simple
# legibility of this file itself.
#
# It is expected that future work will move the `coder` field into a format that it would be
# represented by the Runner API, so that it can be understood by all SDKs and harnesses.
#
# If a coder is marked non-deterministic in the coder spec, then only the decoding should be validated.
coder:
urn: "beam:coder:bytes:v1"
nested: false
examples:
"abc": abc
"ab\0c": "ab\0c"
---
coder:
urn: "beam:coder:bytes:v1"
nested: true
examples:
"\u0003abc": abc
"\u0004ab\0c": "ab\0c"
"\u00c8\u0001 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|":
" 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|"
---
coder:
urn: "beam:coder:bool:v1"
examples:
"\0": False
"\u0001": True
---
coder:
urn: "beam:coder:string_utf8:v1"
nested: false
examples:
"abc": abc
"ab\0c": "ab\0c"
"\u00c3\u00bf": "\u00ff"
"\u00e5\u0085\u0089\u00e7\u00ba\u00bf": "光线"
---
coder:
urn: "beam:coder:string_utf8:v1"
nested: true
examples:
"\u0003abc": abc
"\u0004ab\0c": "ab\0c"
"\u0002\u00c3\u00bf": "\u00ff"
"\u0006\u00e5\u0085\u0089\u00e7\u00ba\u00bf": "光线"
"\u00c8\u0001 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|":
" 10| 20| 30| 40| 50| 60| 70| 80| 90| 100| 110| 120| 130| 140| 150| 160| 170| 180| 190| 200|"
---
coder:
urn: "beam:coder:varint:v1"
examples:
"\0": 0
"\u0001": 1
"\u000A": 10
"\u00c8\u0001": 200
"\u00e8\u0007": 1000
"\u00a9\u0046": 9001
"\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u0001": -1
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:varint:v1"}]
examples:
"\u0003abc\0": {key: abc, value: 0}
"\u0004ab\0c\u000A": {key: "ab\0c", value: 10}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bytes:v1"}]
nested: false
examples:
"\u0003abcdef": {key: abc, value: def}
"\u0004ab\0cde\0f": {key: "ab\0c", value: "de\0f"}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bytes:v1"}]
nested: true
examples:
"\u0003abc\u0003def": {key: abc, value: def}
"\u0004ab\0c\u0004de\0f": {key: "ab\0c", value: "de\0f"}
---
coder:
urn: "beam:coder:kv:v1"
components: [{urn: "beam:coder:bytes:v1"},
{urn: "beam:coder:bool:v1"}]
examples:
"\u0003abc\u0001": {key: abc, value: True}
"\u0004ab\0c\0": {key: "ab\0c", value: False}
---
coder:
urn: "beam:coder:interval_window:v1"
examples:
"\u0080\u0000\u0001\u0052\u009a\u00a4\u009b\u0068\u0080\u00dd\u00db\u0001" : {end: 1454293425000, span: 3600000}
"\u0080\u0000\u0001\u0053\u0034\u00ec\u0074\u00e8\u0080\u0090\u00fb\u00d3\u0009" : {end: 1456881825000, span: 2592000000}
"\u007f\u00df\u003b\u0064\u005a\u001c\u00ad\u0076\u00ed\u0002" : {end: -9223372036854410, span: 365}
"\u0080\u0020\u00c4\u009b\u00a5\u00e3\u0053\u00f7\u0000" : {end: 9223372036854775, span: 0}
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:varint:v1"}]
examples:
"\0\0\0\u0001\0": [0]
"\0\0\0\u0004\u0001\n\u00c8\u0001\u00e8\u0007": [1, 10, 200, 1000]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bytes:v1"}]
examples:
"\0\0\0\u0001\u0003abc": ["abc"]
"\0\0\0\u0002\u0004ab\0c\u0004de\0f": ["ab\0c", "de\0f"]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bool:v1"}]
examples:
"\0\0\0\u0001\u0001": [True]
"\0\0\0\u0002\u0001\0": [True, False]
"\0\0\0\0": []
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:bytes:v1"}]
# This is for iterables of unknown length, where the encoding is not
# deterministic.
non_deterministic: True
examples:
"\u00ff\u00ff\u00ff\u00ff\u0000": []
"\u00ff\u00ff\u00ff\u00ff\u0001\u0003abc\u0000": ["abc"]
"\u00ff\u00ff\u00ff\u00ff\u0002\u0004ab\u0000c\u0004de\u0000f\u0000": ["ab\0c", "de\0f"]
---
coder:
urn: "beam:coder:iterable:v1"
components: [{urn: "beam:coder:global_window:v1"}]
examples:
"\0\0\0\u0001": [""]
---
coder:
urn: "beam:coder:timer:v1"
components: [{urn: "beam:coder:bytes:v1"}]
examples:
"\0\0\0\0\0\0\0\0\u0003abc": {timestamp: -9223372036854775808, payload: abc}
"\x7fÿÿÿÿÿÿ\x01\u0003abc": {timestamp: -255, payload: abc}
"\x7fÿÿÿÿÿÿÿ\u0003abc": {timestamp: -1, payload: abc}
"\x80\0\0\0\0\0\0\0\u0003abc": {timestamp: 0, payload: abc}
"\x80\0\0\0\0\0\0\x01\u0003abc": {timestamp: 1, payload: abc}
"\x80\0\0\0\0\0\x01\0\u0003abc": {timestamp: 256, payload: abc}
"ÿÿÿÿÿÿÿÿ\u0003abc": {timestamp: 9223372036854775807, payload: abc}
---
coder:
urn: "beam:coder:global_window:v1"
examples:
"": ""
---
# All windowed values consist of pane infos that represent NO_FIRING until full support is added
# in the Python SDK (BEAM-1522).
coder:
urn: "beam:coder:windowed_value:v1"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:global_window:v1"}]
examples:
"\u0080\0\u0001R\u009a\u00a4\u009bh\0\0\0\u0001\u000f\u0002": {
value: 2,
timestamp: 1454293425000,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: ["global"]
}
---
coder:
urn: "beam:coder:windowed_value:v1"
components: [{urn: "beam:coder:varint:v1"},
{urn: "beam:coder:interval_window:v1"}]
examples:
"\u007f\u00ff\u00ff\u00ff\u00ff\u00f9\u00e5\u0080\0\0\0\u0001\u0080\0\u0001R\u009a\u00a4\u009bh\u00c0\u008b\u0011\u000f\u0004": {
value: 4,
timestamp: -400000,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: [{end: 1454293425000, span: 280000}]
}
"\u007f\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u009c\0\0\0\u0002\u0080\0\u0001R\u009a\u00a4\u009bh\u0080\u00dd\u00db\u0001\u007f\u00df;dZ\u001c\u00adv\u00ed\u0002\u000f\u0002": {
value: 2,
timestamp: -100,
pane: {is_first: True, is_last: True, timing: UNKNOWN, index: 0, on_time_index: 0},
windows: [{end: 1454293425000, span: 3600000}, {end: -9223372036854410, span: 365}]
}
---
coder:
urn: "beam:coder:double:v1"
examples:
"\0\0\0\0\0\0\0\0": "0"
"\u0080\0\0\0\0\0\0\0": "-0"
"\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": "0.1"
"\u00bf\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": "-0.1"
"\0\0\0\0\0\0\0\u0001": "4.9e-324"
"\0\u0001\0\0\0\0\0\0": "1.390671161567e-309"
"\u007f\u00ef\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff": "1.7976931348623157e308"
"\u007f\u00f0\0\0\0\0\0\0": "Infinity"
"\u00ff\u00f0\0\0\0\0\0\0": "-Infinity"
"\u007f\u00f8\0\0\0\0\0\0": "NaN"
---
coder:
urn: "beam:coder:row:v1"
# str: string, i32: int32, f64: float64, arr: array[string]
payload: "\n\t\n\x03str\x1a\x02\x10\x07\n\t\n\x03i32\x1a\x02\x10\x03\n\t\n\x03f64\x1a\x02\x10\x06\n\r\n\x03arr\x1a\x06\x1a\x04\n\x02\x10\x07\x12$4e5e554c-d4c1-4a5d-b5e1-f3293a6b9f05"
nested: false
examples:
"\u0004\u0000\u0003foo\u00a9\u0046\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a\0\0\0\u0003\u0003foo\u0003bar\u0003baz": {str: "foo", i32: 9001, f64: "0.1", arr: ["foo", "bar", "baz"]}
---
coder:
urn: "beam:coder:row:v1"
# str: nullable string, i32: nullable int32, f64: nullable float64
payload: "\n\x0b\n\x03str\x1a\x04\x08\x01\x10\x07\n\x0b\n\x03i32\x1a\x04\x08\x01\x10\x03\n\x0b\n\x03f64\x1a\x04\x08\x01\x10\x06\x12$b20c6545-57af-4bc8-b2a9-51ace21c7393"
nested: false
examples:
"\u0003\u0001\u0007": {str: null, i32: null, f64: null}
"\u0003\u0001\u0004\u0003foo\u00a9\u0046": {str: "foo", i32: 9001, f64: null}
"\u0003\u0000\u0003foo\u00a9\u0046\u003f\u00b9\u0099\u0099\u0099\u0099\u0099\u009a": {str: "foo", i32: 9001, f64: "0.1"}