| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.apex.malhar.lib.window.sample.wordcount; |
| |
| import java.io.BufferedReader; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.InputStreamReader; |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| import org.joda.time.Duration; |
| |
| import org.apache.apex.malhar.lib.io.ConsoleOutputOperator; |
| import org.apache.apex.malhar.lib.util.KeyValPair; |
| import org.apache.apex.malhar.lib.window.Accumulation; |
| import org.apache.apex.malhar.lib.window.ControlTuple; |
| import org.apache.apex.malhar.lib.window.SumAccumulation; |
| import org.apache.apex.malhar.lib.window.TriggerOption; |
| import org.apache.apex.malhar.lib.window.Tuple; |
| import org.apache.apex.malhar.lib.window.WindowOption; |
| import org.apache.apex.malhar.lib.window.WindowState; |
| import org.apache.apex.malhar.lib.window.impl.InMemoryWindowedKeyedStorage; |
| import org.apache.apex.malhar.lib.window.impl.InMemoryWindowedStorage; |
| import org.apache.apex.malhar.lib.window.impl.KeyedWindowedOperatorImpl; |
| import org.apache.apex.malhar.lib.window.impl.WatermarkImpl; |
| import org.apache.commons.io.IOUtils; |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.commons.lang3.mutable.MutableLong; |
| import org.apache.hadoop.conf.Configuration; |
| |
| import com.google.common.base.Throwables; |
| |
| import com.datatorrent.api.Context; |
| import com.datatorrent.api.DAG; |
| import com.datatorrent.api.DefaultOutputPort; |
| import com.datatorrent.api.InputOperator; |
| import com.datatorrent.api.LocalMode; |
| import com.datatorrent.api.StreamingApplication; |
| import com.datatorrent.common.util.BaseOperator; |
| |
| /** |
| * This is an example of using the WindowedOperator concepts to do streaming word count. |
| */ |
| public class Application implements StreamingApplication |
| { |
| public static class WordGenerator extends BaseOperator implements InputOperator |
| { |
| public final transient DefaultOutputPort<Tuple<KeyValPair<String, Long>>> output = new DefaultOutputPort<>(); |
| public final transient DefaultOutputPort<ControlTuple> controlOutput = new DefaultOutputPort<>(); |
| |
| private transient BufferedReader reader; |
| |
| @Override |
| public void setup(Context.OperatorContext context) |
| { |
| initReader(); |
| } |
| |
| private void initReader() |
| { |
| try { |
| InputStream resourceStream = this.getClass().getResourceAsStream("/wordcount.txt"); |
| reader = new BufferedReader(new InputStreamReader(resourceStream)); |
| } catch (Exception ex) { |
| throw Throwables.propagate(ex); |
| } |
| } |
| |
| @Override |
| public void teardown() |
| { |
| IOUtils.closeQuietly(reader); |
| } |
| |
| @Override |
| public void emitTuples() |
| { |
| try { |
| String line = reader.readLine(); |
| if (line == null) { |
| reader.close(); |
| initReader(); |
| } else { |
| // simulate late data |
| long timestamp = System.currentTimeMillis() - (long)(Math.random() * 30000); |
| Map<String, Long> countMap = new HashMap<>(); |
| for (String str : line.split("[\\p{Punct}\\s]+")) { |
| countMap.put(StringUtils.lowerCase(str), (countMap.containsKey(str)) ? countMap.get(str) + 1 : 1); |
| } |
| for (Map.Entry<String, Long> entry : countMap.entrySet()) { |
| String word = entry.getKey(); |
| long count = entry.getValue(); |
| Tuple.TimestampedTuple<KeyValPair<String, Long>> tuple = new Tuple.TimestampedTuple<>(timestamp, new KeyValPair<>(word, count)); |
| this.output.emit(tuple); |
| } |
| } |
| } catch (IOException ex) { |
| throw new RuntimeException(ex); |
| } |
| } |
| |
| @Override |
| public void endWindow() |
| { |
| this.controlOutput.emit(new WatermarkImpl(System.currentTimeMillis() - 15000)); |
| } |
| } |
| |
| @Override |
| public void populateDAG(DAG dag, Configuration configuration) |
| { |
| WordGenerator inputOperator = new WordGenerator(); |
| KeyedWindowedOperatorImpl<String, Long, MutableLong, Long> windowedOperator = new KeyedWindowedOperatorImpl<>(); |
| Accumulation<Long, MutableLong, Long> sum = new SumAccumulation(); |
| |
| windowedOperator.setAccumulation(sum); |
| windowedOperator.setDataStorage(new InMemoryWindowedKeyedStorage<String, MutableLong>()); |
| windowedOperator.setRetractionStorage(new InMemoryWindowedKeyedStorage<String, Long>()); |
| windowedOperator.setWindowStateStorage(new InMemoryWindowedStorage<WindowState>()); |
| windowedOperator.setWindowOption(new WindowOption.TimeWindows(Duration.standardMinutes(1))); |
| windowedOperator.setTriggerOption(TriggerOption.AtWatermark().withEarlyFiringsAtEvery(Duration.millis(1000)).accumulatingAndRetractingFiredPanes()); |
| //windowedOperator.setAllowedLateness(Duration.millis(14000)); |
| |
| ConsoleOutputOperator outputOperator = new ConsoleOutputOperator(); |
| dag.addOperator("inputOperator", inputOperator); |
| dag.addOperator("windowedOperator", windowedOperator); |
| dag.addOperator("outputOperator", outputOperator); |
| dag.addStream("input_windowed", inputOperator.output, windowedOperator.input); |
| dag.addStream("windowed_output", windowedOperator.output, outputOperator.input); |
| } |
| |
| public static void main(String[] args) throws Exception |
| { |
| LocalMode lma = LocalMode.newInstance(); |
| Configuration conf = new Configuration(false); |
| lma.prepareDAG(new Application(), conf); |
| LocalMode.Controller lc = lma.getController(); |
| lc.run(); |
| } |
| } |