blob: 9f4e43ae07fdb0455e09cd1cff0e877a74cdc5df [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.spark.coders;
import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkNotNull;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.beam.runners.spark.util.ByteArray;
import org.apache.beam.sdk.coders.Coder;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import scala.Tuple2;
/** Serialization utility class. */
public final class CoderHelpers {
private CoderHelpers() {}
/**
* Utility method for serializing an object using the specified coder.
*
* @param value Value to serialize.
* @param coder Coder to serialize with.
* @param <T> type of value that is serialized
* @return Byte array representing serialized object.
*/
public static <T> byte[] toByteArray(T value, Coder<T> coder) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
coder.encode(value, baos);
} catch (IOException e) {
throw new IllegalStateException("Error encoding value: " + value, e);
}
return baos.toByteArray();
}
/**
* Utility method for serializing a Iterable of values using the specified coder.
*
* @param values Values to serialize.
* @param coder Coder to serialize with.
* @param <T> type of value that is serialized
* @return List of bytes representing serialized objects.
*/
public static <T> List<byte[]> toByteArrays(Iterable<T> values, Coder<T> coder) {
List<byte[]> res = new ArrayList<>();
for (T value : values) {
res.add(toByteArray(value, coder));
}
return res;
}
/**
* Utility method for deserializing a byte array using the specified coder.
*
* @param serialized bytearray to be deserialized.
* @param coder Coder to deserialize with.
* @param <T> Type of object to be returned.
* @return Deserialized object.
*/
public static <T> T fromByteArray(byte[] serialized, Coder<T> coder) {
ByteArrayInputStream bais = new ByteArrayInputStream(serialized);
try {
return coder.decode(bais);
} catch (IOException e) {
throw new IllegalStateException("Error decoding bytes for coder: " + coder, e);
}
}
/**
* Utility method for deserializing a Iterable of byte arrays using the specified coder.
*
* @param serialized bytearrays to be deserialized.
* @param coder Coder to deserialize with.
* @param <T> Type of object to be returned.
* @return Iterable of deserialized objects.
*/
public static <T> Iterable<T> fromByteArrays(
Collection<byte[]> serialized, final Coder<T> coder) {
return serialized.stream()
.map(bytes -> fromByteArray(checkNotNull(bytes, "Cannot decode null values."), coder))
.collect(Collectors.toList());
}
/**
* A function wrapper for converting an object to a bytearray.
*
* @param coder Coder to serialize with.
* @param <T> The type of the object being serialized.
* @return A function that accepts an object and returns its coder-serialized form.
*/
public static <T> Function<T, byte[]> toByteFunction(final Coder<T> coder) {
return t -> toByteArray(t, coder);
}
/**
* A function wrapper for converting a byte array to an object.
*
* @param coder Coder to deserialize with.
* @param <T> The type of the object being deserialized.
* @return A function that accepts a byte array and returns its corresponding object.
*/
public static <T> Function<byte[], T> fromByteFunction(final Coder<T> coder) {
return bytes -> fromByteArray(bytes, coder);
}
/**
* A function wrapper for converting a key-value pair to a byte array pair.
*
* @param keyCoder Coder to serialize keys.
* @param valueCoder Coder to serialize values.
* @param <K> The type of the key being serialized.
* @param <V> The type of the value being serialized.
* @return A function that accepts a key-value pair and returns a pair of byte arrays.
*/
public static <K, V> PairFunction<Tuple2<K, V>, ByteArray, byte[]> toByteFunction(
final Coder<K> keyCoder, final Coder<V> valueCoder) {
return kv ->
new Tuple2<>(
new ByteArray(toByteArray(kv._1(), keyCoder)), toByteArray(kv._2(), valueCoder));
}
/**
* A function wrapper for converting a byte array pair to a key-value pair.
*
* @param keyCoder Coder to deserialize keys.
* @param valueCoder Coder to deserialize values.
* @param <K> The type of the key being deserialized.
* @param <V> The type of the value being deserialized.
* @return A function that accepts a pair of byte arrays and returns a key-value pair.
*/
public static <K, V> PairFunction<Tuple2<ByteArray, byte[]>, K, V> fromByteFunction(
final Coder<K> keyCoder, final Coder<V> valueCoder) {
return tuple ->
new Tuple2<>(
fromByteArray(tuple._1().getValue(), keyCoder), fromByteArray(tuple._2(), valueCoder));
}
/**
* A function wrapper for converting a byte array pair to a key-value pair, where values are
* {@link Iterable}.
*
* @param keyCoder Coder to deserialize keys.
* @param valueCoder Coder to deserialize values.
* @param <K> The type of the key being deserialized.
* @param <V> The type of the value being deserialized.
* @return A function that accepts a pair of byte arrays and returns a key-value pair.
*/
public static <K, V>
PairFunction<Tuple2<ByteArray, Iterable<byte[]>>, K, Iterable<V>> fromByteFunctionIterable(
final Coder<K> keyCoder, final Coder<V> valueCoder) {
return tuple ->
new Tuple2<>(
fromByteArray(tuple._1().getValue(), keyCoder),
StreamSupport.stream(tuple._2().spliterator(), false)
.map(bytes -> fromByteArray(bytes, valueCoder))
.collect(Collectors.toList()));
}
}