blob: 7fc71acbd2206766747be8af8ad7fe3d30c52d8b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.dataflow.worker;
import static org.apache.beam.runners.dataflow.util.Structs.getString;
import static org.apache.beam.vendor.guava.v20_0.com.google.common.base.Preconditions.checkArgument;
import com.google.auto.service.AutoService;
import java.math.RoundingMode;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecord;
import org.apache.beam.runners.dataflow.internal.IsmFormat.IsmRecordCoder;
import org.apache.beam.runners.dataflow.options.DataflowWorkerHarnessOptions;
import org.apache.beam.runners.dataflow.util.CloudObject;
import org.apache.beam.runners.dataflow.worker.util.WorkerPropertyNames;
import org.apache.beam.runners.dataflow.worker.util.common.worker.Sink;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.util.WindowedValue;
import org.apache.beam.sdk.util.WindowedValue.WindowedValueCoder;
import org.apache.beam.vendor.guava.v20_0.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v20_0.com.google.common.math.DoubleMath;
/**
* Creates an {@link IsmSink} from a {@link CloudObject} spec. Note that it is invalid to use a non
* {@link IsmRecordCoder} with this sink factory.
*/
public class IsmSinkFactory implements SinkFactory {
/** A {@link SinkFactory.Registrar} for ISM sinks. */
@AutoService(SinkFactory.Registrar.class)
public static class Registrar implements SinkFactory.Registrar {
@Override
public Map<String, SinkFactory> factories() {
IsmSinkFactory factory = new IsmSinkFactory();
return ImmutableMap.of(
"IsmSink", factory, "org.apache.beam.runners.dataflow.worker.IsmSink", factory);
}
}
// Limit Bloom filters to be at most 0.1% of the worker cache size each
private static final double BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER = 0.001;
private static final long MIN_BLOOM_FILTER_SIZE_BYTES = 128L;
@Override
public Sink<?> create(
CloudObject spec,
Coder<?> coder,
PipelineOptions options,
@Nullable DataflowExecutionContext executionContext,
DataflowOperationContext operationContext)
throws Exception {
// The validity of this coder is checked in detail by the typed create, below
@SuppressWarnings("unchecked")
Coder<WindowedValue<IsmRecord<Object>>> typedCoder =
(Coder<WindowedValue<IsmRecord<Object>>>) coder;
String filename = getString(spec, WorkerPropertyNames.FILENAME);
checkArgument(
typedCoder instanceof WindowedValueCoder,
"%s only supports using %s but got %s.",
IsmSink.class,
WindowedValueCoder.class,
typedCoder);
WindowedValueCoder<IsmRecord<Object>> windowedCoder =
(WindowedValueCoder<IsmRecord<Object>>) typedCoder;
checkArgument(
windowedCoder.getValueCoder() instanceof IsmRecordCoder,
"%s only supports using %s but got %s.",
IsmSink.class,
IsmRecordCoder.class,
windowedCoder.getValueCoder());
@SuppressWarnings("unchecked")
IsmRecordCoder<Object> ismCoder = (IsmRecordCoder<Object>) windowedCoder.getValueCoder();
long bloomFilterSizeLimitBytes =
Math.max(
MIN_BLOOM_FILTER_SIZE_BYTES,
DoubleMath.roundToLong(
BLOOM_FILTER_SIZE_LIMIT_MULTIPLIER
* options.as(DataflowWorkerHarnessOptions.class).getWorkerCacheMb()
// Note the conversion from MiB to bytes
* 1024
* 1024,
RoundingMode.DOWN));
return new IsmSink<>(
FileSystems.matchNewResource(filename, false), ismCoder, bloomFilterSizeLimitBytes);
}
}