blob: ecbf7c5e98b4d94f48ec3bec7f598ff6a50c8787 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.scala.examples.iteration
import java.util.Random
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction.SourceContext
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
/**
* Example illustrating iterations in Flink streaming.
*
* The program sums up random numbers and counts additions
* it performs to reach a specific threshold in an iterative streaming fashion.
*
* This example shows how to use:
*
* - streaming iterations,
* - buffer timeout to enhance latency,
* - directed outputs.
*
*/
object IterateExample {
private val Bound = 100
def main(args: Array[String]): Unit = {
// Checking input parameters
val params = ParameterTool.fromArgs(args)
// obtain execution environment and set setBufferTimeout to 1 to enable
// continuous flushing of the output buffers (lowest latency)
val env = StreamExecutionEnvironment.getExecutionEnvironment.setBufferTimeout(1)
// make parameters available in the web interface
env.getConfig.setGlobalJobParameters(params)
// create input stream of integer pairs
val inputStream: DataStream[(Int, Int)] =
if (params.has("input")) {
// map a list of strings to integer pairs
env.readTextFile(params.get("input")).map { value: String =>
val record = value.substring(1, value.length - 1)
val splitted = record.split(",")
(Integer.parseInt(splitted(0)), Integer.parseInt(splitted(1)))
}
} else {
println("Executing Iterate example with default input data set.")
println("Use --input to specify file input.")
env.addSource(new RandomFibonacciSource)
}
def withinBound(value: (Int, Int)) = value._1 < Bound && value._2 < Bound
// create an iterative data stream from the input with 5 second timeout
val numbers: DataStream[((Int, Int), Int)] = inputStream
// Map the inputs so that the next Fibonacci numbers can be calculated
// while preserving the original input tuple
// A counter is attached to the tuple and incremented in every iteration step
.map(value => (value._1, value._2, value._1, value._2, 0))
.iterate(
(iteration: DataStream[(Int, Int, Int, Int, Int)]) => {
// calculates the next Fibonacci number and increment the counter
val step = iteration.map(value =>
(value._1, value._2, value._4, value._3 + value._4, value._5 + 1))
// testing which tuple needs to be iterated again
val feedback = step.filter(value => withinBound(value._3, value._4))
// giving back the input pair and the counter
val output: DataStream[((Int, Int), Int)] = step
.filter(value => !withinBound(value._3, value._4))
.map(value => ((value._1, value._2), value._5))
(feedback, output)
}
// timeout after 5 seconds
, 5000L
)
if (params.has("output")) {
numbers.writeAsText(params.get("output"))
} else {
println("Printing result to stdout. Use --output to specify output path.")
numbers.print()
}
env.execute("Streaming Iteration Example")
}
// *************************************************************************
// USER FUNCTIONS
// *************************************************************************
/**
* Generate BOUND number of random integer pairs from the range from 0 to BOUND/2
*/
private class RandomFibonacciSource extends SourceFunction[(Int, Int)] {
val rnd = new Random()
var counter = 0
@volatile var isRunning = true
override def run(ctx: SourceContext[(Int, Int)]): Unit = {
while (isRunning && counter < Bound) {
val first = rnd.nextInt(Bound / 2 - 1) + 1
val second = rnd.nextInt(Bound / 2 - 1) + 1
ctx.collect((first, second))
counter += 1
Thread.sleep(50L)
}
}
override def cancel(): Unit = isRunning = false
}
}