| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.flink.benchmark; |
| |
| import joptsimple.internal.Strings; |
| import org.apache.flink.api.common.io.FileInputFormat; |
| import org.apache.flink.api.common.restartstrategy.RestartStrategies; |
| import org.apache.flink.api.common.typeinfo.TypeInformation; |
| import org.apache.flink.configuration.Configuration; |
| import org.apache.flink.core.fs.FileInputSplit; |
| import org.apache.flink.core.fs.Path; |
| import org.apache.flink.core.testutils.OneShotLatch; |
| import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; |
| import org.apache.flink.streaming.api.functions.sink.SinkFunction; |
| import org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperatorFactory; |
| import org.apache.flink.streaming.api.functions.source.SourceFunction; |
| import org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit; |
| import org.openjdk.jmh.annotations.Benchmark; |
| import org.openjdk.jmh.annotations.Level; |
| import org.openjdk.jmh.annotations.OperationsPerInvocation; |
| import org.openjdk.jmh.annotations.TearDown; |
| import org.openjdk.jmh.runner.Runner; |
| import org.openjdk.jmh.runner.RunnerException; |
| import org.openjdk.jmh.runner.options.Options; |
| import org.openjdk.jmh.runner.options.OptionsBuilder; |
| import org.openjdk.jmh.runner.options.VerboseMode; |
| |
| import java.util.concurrent.TimeUnit; |
| import java.util.concurrent.TimeoutException; |
| |
| @OperationsPerInvocation(value = ContinuousFileReaderOperatorBenchmark.RECORDS_PER_INVOCATION) |
| public class ContinuousFileReaderOperatorBenchmark extends BenchmarkBase { |
| private static final int SPLITS_PER_INVOCATION = 100; |
| private static final int LINES_PER_SPLIT = 175_000; |
| public static final int RECORDS_PER_INVOCATION = SPLITS_PER_INVOCATION * LINES_PER_SPLIT; |
| |
| private static final TimestampedFileInputSplit SPLIT = new TimestampedFileInputSplit(0, 0, new Path("."), 0, 0, new String[]{}); |
| private static final String LINE = Strings.repeat('0', 10); |
| |
| // Source should wait until all elements reach sink. Otherwise, END_OF_INPUT is sent once all splits are emitted. |
| // Thus, all subsequent reads in ContinuousFileReaderOperator would be made in CLOSING state in a simple while-true loop (MailboxExecutor.isIdle is always true). |
| private static OneShotLatch TARGET_COUNT_REACHED_LATCH = new OneShotLatch(); |
| |
| public static void main(String[] args) |
| throws RunnerException { |
| Options options = new OptionsBuilder() |
| .verbosity(VerboseMode.NORMAL) |
| .include(".*" + ContinuousFileReaderOperatorBenchmark.class.getCanonicalName() + ".*") |
| .build(); |
| |
| new Runner(options).run(); |
| } |
| |
| @TearDown(Level.Iteration) |
| public void tearDown() { |
| TARGET_COUNT_REACHED_LATCH.reset(); |
| } |
| |
| @Benchmark |
| public void readFileSplit(FlinkEnvironmentContext context) throws Exception { |
| StreamExecutionEnvironment env = context.env; |
| env.setRestartStrategy(new RestartStrategies.NoRestartStrategyConfiguration()); |
| env |
| .enableCheckpointing(100) |
| .setParallelism(1) |
| .addSource(new MockSourceFunction()) |
| .transform("fileReader", TypeInformation.of(String.class), |
| new ContinuousFileReaderOperatorFactory<>(new MockInputFormat())) |
| .addSink(new LimitedSink()); |
| |
| env.execute(); |
| } |
| |
| private static class MockSourceFunction implements SourceFunction<TimestampedFileInputSplit> { |
| private volatile boolean isRunning = true; |
| private int count = 0; |
| |
| @Override |
| public void run(SourceContext<TimestampedFileInputSplit> ctx) { |
| while (isRunning && count < SPLITS_PER_INVOCATION) { |
| count++; |
| synchronized (ctx.getCheckpointLock()) { |
| ctx.collect(SPLIT); |
| } |
| } |
| while (isRunning) { |
| try { |
| TARGET_COUNT_REACHED_LATCH.await(100, TimeUnit.MILLISECONDS); |
| return; |
| } catch (InterruptedException e) { |
| if (!isRunning) { |
| Thread.currentThread().interrupt(); |
| } |
| } catch (TimeoutException e) { |
| // continue waiting |
| } |
| } |
| } |
| |
| @Override |
| public void cancel() { |
| isRunning = false; |
| } |
| } |
| |
| private static class MockInputFormat extends FileInputFormat<String> { |
| private transient int count = 0; |
| |
| @Override |
| public boolean reachedEnd() { |
| return count >= ContinuousFileReaderOperatorBenchmark.LINES_PER_SPLIT; |
| } |
| |
| @Override |
| public String nextRecord(String s) { |
| count++; |
| return LINE; |
| } |
| |
| @Override |
| public void open(FileInputSplit fileSplit) { |
| count = 0; |
| // prevent super from accessing file |
| } |
| |
| @Override |
| public void configure(Configuration parameters) { |
| // prevent super from requiring certain settings (input.file.path) |
| } |
| } |
| |
| private static class LimitedSink implements SinkFunction<String> { |
| private int count; |
| |
| @Override |
| public void invoke(String value, Context context) { |
| if (++count == RECORDS_PER_INVOCATION) { |
| TARGET_COUNT_REACHED_LATCH.trigger(); |
| } |
| } |
| } |
| } |