| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.flink.storm.wordcount; |
| |
| import org.apache.storm.generated.StormTopology; |
| import org.apache.storm.topology.TopologyBuilder; |
| import org.apache.storm.tuple.Fields; |
| |
| import org.apache.flink.examples.java.wordcount.util.WordCountData; |
| import org.apache.flink.storm.util.BoltFileSink; |
| import org.apache.flink.storm.util.BoltPrintSink; |
| import org.apache.flink.storm.util.NullTerminatingSpout; |
| import org.apache.flink.storm.util.OutputFormatter; |
| import org.apache.flink.storm.util.TupleOutputFormatter; |
| import org.apache.flink.storm.wordcount.operators.BoltCounter; |
| import org.apache.flink.storm.wordcount.operators.BoltCounterByName; |
| import org.apache.flink.storm.wordcount.operators.BoltTokenizer; |
| import org.apache.flink.storm.wordcount.operators.BoltTokenizerByName; |
| import org.apache.flink.storm.wordcount.operators.WordCountFileSpout; |
| import org.apache.flink.storm.wordcount.operators.WordCountInMemorySpout; |
| |
| /** |
| * Implements the "WordCount" program that computes a simple word occurrence histogram over text files in a streaming |
| * fashion. The program is constructed as a regular {@link StormTopology}. |
| * <p> |
| * The input is a plain text file with lines separated by newline characters. |
| * <p> |
| * Usage: |
| * <code>WordCount[Local|LocalByName|RemoteByClient|RemoteBySubmitter] <text path> <result path></code><br> |
| * If no parameters are provided, the program is run with default data from {@link WordCountData}. |
| * <p> |
| * This example shows how to: |
| * <ul> |
| * <li>how to construct a regular Storm topology as Flink program</li> |
| * </ul> |
| */ |
| public class WordCountTopology { |
| public final static String spoutId = "source"; |
| public final static String tokenierzerId = "tokenizer"; |
| public final static String counterId = "counter"; |
| public final static String sinkId = "sink"; |
| private final static OutputFormatter formatter = new TupleOutputFormatter(); |
| |
| public static TopologyBuilder buildTopology() { |
| return buildTopology(true); |
| } |
| |
| public static TopologyBuilder buildTopology(boolean indexOrName) { |
| |
| final TopologyBuilder builder = new TopologyBuilder(); |
| |
| // get input data |
| if (fileInputOutput) { |
| // read the text file from given input path |
| final String[] tokens = textPath.split(":"); |
| final String inputFile = tokens[tokens.length - 1]; |
| // inserting NullTerminatingSpout only required to stabilize integration test |
| builder.setSpout(spoutId, new NullTerminatingSpout(new WordCountFileSpout(inputFile))); |
| } else { |
| builder.setSpout(spoutId, new WordCountInMemorySpout()); |
| } |
| |
| if (indexOrName) { |
| // split up the lines in pairs (2-tuples) containing: (word,1) |
| builder.setBolt(tokenierzerId, new BoltTokenizer(), 4).shuffleGrouping(spoutId); |
| // group by the tuple field "0" and sum up tuple field "1" |
| builder.setBolt(counterId, new BoltCounter(), 4).fieldsGrouping(tokenierzerId, |
| new Fields(BoltTokenizer.ATTRIBUTE_WORD)); |
| } else { |
| // split up the lines in pairs (2-tuples) containing: (word,1) |
| builder.setBolt(tokenierzerId, new BoltTokenizerByName(), 4).shuffleGrouping( |
| spoutId); |
| // group by the tuple field "0" and sum up tuple field "1" |
| builder.setBolt(counterId, new BoltCounterByName(), 4).fieldsGrouping( |
| tokenierzerId, new Fields(BoltTokenizerByName.ATTRIBUTE_WORD)); |
| } |
| |
| // emit result |
| if (fileInputOutput) { |
| // read the text file from given input path |
| final String[] tokens = outputPath.split(":"); |
| final String outputFile = tokens[tokens.length - 1]; |
| builder.setBolt(sinkId, new BoltFileSink(outputFile, formatter)).shuffleGrouping(counterId); |
| } else { |
| builder.setBolt(sinkId, new BoltPrintSink(formatter), 4).shuffleGrouping(counterId); |
| } |
| |
| return builder; |
| } |
| |
| // ************************************************************************* |
| // UTIL METHODS |
| // ************************************************************************* |
| |
| private static boolean fileInputOutput = false; |
| private static String textPath; |
| private static String outputPath; |
| |
| static boolean parseParameters(final String[] args) { |
| |
| if (args.length > 0) { |
| // parse input arguments |
| fileInputOutput = true; |
| if (args.length == 2) { |
| textPath = args[0]; |
| outputPath = args[1]; |
| } else { |
| System.err.println("Usage: WordCount* <text path> <result path>"); |
| return false; |
| } |
| } else { |
| System.out.println("Executing WordCount example with built-in default data"); |
| System.out.println(" Provide parameters to read input data from a file"); |
| System.out.println(" Usage: WordCount* <text path> <result path>"); |
| } |
| |
| return true; |
| } |
| |
| } |