examples/wordcount/src/main/java/org/apache/apex/examples/wordcount/ApplicationWithQuerySupport.java - apex-malhar - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.apex.examples.wordcount;

 import java.net.URI;

 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import org.apache.apex.malhar.lib.appdata.schemas.SchemaUtils;
 import org.apache.apex.malhar.lib.appdata.snapshot.AppDataSnapshotServerMap;
 import org.apache.apex.malhar.lib.io.ConsoleOutputOperator;
 import org.apache.apex.malhar.lib.io.PubSubWebSocketAppDataQuery;
 import org.apache.apex.malhar.lib.io.PubSubWebSocketAppDataResult;
 import org.apache.apex.malhar.lib.utils.PubSubHelper;
 import org.apache.hadoop.conf.Configuration;

 import com.datatorrent.api.DAG;
 import com.datatorrent.api.Operator;
 import com.datatorrent.api.StreamingApplication;
 import com.datatorrent.api.annotation.ApplicationAnnotation;

 /**
  * Simple example that computes word frequencies from any file dropped into a
  * monitored directory. It outputs the top N word-frequency pairs for each file
  * as well globally across all files.
  * <p>
  * Each input file generates a corresponding output file in the output directory
  * containing the top N pairs for that file. The output is also written
  * to an internal store to support visualization in the UI via queries.
  * <p>
  * @since 3.2.0
  */
 @ApplicationAnnotation(name = "TopNWordsWithQueries")
 public class ApplicationWithQuerySupport implements StreamingApplication
 {
   private static final Logger LOG = LoggerFactory.getLogger(ApplicationWithQuerySupport.class);

   /**
    * Name of schema file.
    */
   public static final String SNAPSHOT_SCHEMA = "WordDataSchema.json";

   /**
    * Populates the DAG with operators and connecting streams
    *
    * @param dag The directed acyclic graph of operators to populate
    * @param conf The configuration
    */
   @Override
   public void populateDAG(DAG dag, Configuration conf)
   {
     // create operators
     LineReader lineReader            = dag.addOperator("lineReader", new LineReader());
     WordReader wordReader            = dag.addOperator("wordReader", new WordReader());
     WindowWordCount windowWordCount  = dag.addOperator("windowWordCount", new WindowWordCount());
     FileWordCount fileWordCount      = dag.addOperator("fileWordCount", new FileWordCount());
     WordCountWriter wcWriter         = dag.addOperator("wcWriter", new WordCountWriter());
     ConsoleOutputOperator console    = dag.addOperator("console", new ConsoleOutputOperator());
     console.setStringFormat("wordCount: %s");

     // create streams

     dag.addStream("lines",   lineReader.output,  wordReader.input);
     dag.addStream("control", lineReader.control, fileWordCount.control);
     dag.addStream("words",   wordReader.output,  windowWordCount.input);
     dag.addStream("windowWordCounts", windowWordCount.output, fileWordCount.input);
     dag.addStream("fileWordCounts", fileWordCount.fileOutput, wcWriter.input);

     if (PubSubHelper.isGatewayConfigured(dag)) {        // add query support
       URI uri = PubSubHelper.getURI(dag);

       AppDataSnapshotServerMap snapshotServerFile
           = dag.addOperator("snapshotServerFile", new AppDataSnapshotServerMap());
       AppDataSnapshotServerMap snapshotServerGlobal
           = dag.addOperator("snapshotServerGlobal", new AppDataSnapshotServerMap());

       String snapshotServerJSON = SchemaUtils.jarResourceFileToString(SNAPSHOT_SCHEMA);
       snapshotServerFile.setSnapshotSchemaJSON(snapshotServerJSON);
       snapshotServerGlobal.setSnapshotSchemaJSON(snapshotServerJSON);

       PubSubWebSocketAppDataQuery wsQueryFile = new PubSubWebSocketAppDataQuery();
       PubSubWebSocketAppDataQuery wsQueryGlobal = new PubSubWebSocketAppDataQuery();
       wsQueryFile.setUri(uri);
       wsQueryGlobal.setUri(uri);

       snapshotServerFile.setEmbeddableQueryInfoProvider(wsQueryFile);
       snapshotServerGlobal.setEmbeddableQueryInfoProvider(wsQueryGlobal);

       PubSubWebSocketAppDataResult wsResultFile
           = dag.addOperator("wsResultFile", new PubSubWebSocketAppDataResult());
       PubSubWebSocketAppDataResult wsResultGlobal
           = dag.addOperator("wsResultGlobal", new PubSubWebSocketAppDataResult());
       wsResultFile.setUri(uri);
       wsResultGlobal.setUri(uri);

       Operator.InputPort<String> queryResultFilePort = wsResultFile.input;
       Operator.InputPort<String> queryResultGlobalPort = wsResultGlobal.input;

       dag.addStream("WordCountsFile", fileWordCount.outputPerFile, snapshotServerFile.input, console.input);
       dag.addStream("WordCountsGlobal", fileWordCount.outputGlobal, snapshotServerGlobal.input);

       dag.addStream("ResultFile", snapshotServerFile.queryResult, queryResultFilePort);
       dag.addStream("ResultGlobal", snapshotServerGlobal.queryResult, queryResultGlobalPort);
     } else {
       //throw new RuntimeException("Error: No GATEWAY_CONNECT_ADDRESS");
       dag.addStream("WordCounts", fileWordCount.outputPerFile, console.input);
     }

     LOG.info("done with populateDAG, isDebugEnabled = " + LOG.isDebugEnabled());
     LOG.info("Returning from populateDAG");
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.apex.examples.wordcount;

	import java.net.URI;

	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	import org.apache.apex.malhar.lib.appdata.schemas.SchemaUtils;
	import org.apache.apex.malhar.lib.appdata.snapshot.AppDataSnapshotServerMap;
	import org.apache.apex.malhar.lib.io.ConsoleOutputOperator;
	import org.apache.apex.malhar.lib.io.PubSubWebSocketAppDataQuery;
	import org.apache.apex.malhar.lib.io.PubSubWebSocketAppDataResult;
	import org.apache.apex.malhar.lib.utils.PubSubHelper;
	import org.apache.hadoop.conf.Configuration;

	import com.datatorrent.api.DAG;
	import com.datatorrent.api.Operator;
	import com.datatorrent.api.StreamingApplication;
	import com.datatorrent.api.annotation.ApplicationAnnotation;

	/**
	* Simple example that computes word frequencies from any file dropped into a
	* monitored directory. It outputs the top N word-frequency pairs for each file
	* as well globally across all files.
	* <p>
	* Each input file generates a corresponding output file in the output directory
	* containing the top N pairs for that file. The output is also written
	* to an internal store to support visualization in the UI via queries.
	* <p>
	* @since 3.2.0
	*/
	@ApplicationAnnotation(name = "TopNWordsWithQueries")
	public class ApplicationWithQuerySupport implements StreamingApplication
	{
	private static final Logger LOG = LoggerFactory.getLogger(ApplicationWithQuerySupport.class);

	/**
	* Name of schema file.
	*/
	public static final String SNAPSHOT_SCHEMA = "WordDataSchema.json";

	/**
	* Populates the DAG with operators and connecting streams
	*
	* @param dag The directed acyclic graph of operators to populate
	* @param conf The configuration
	*/
	@Override
	public void populateDAG(DAG dag, Configuration conf)
	{
	// create operators
	LineReader lineReader = dag.addOperator("lineReader", new LineReader());
	WordReader wordReader = dag.addOperator("wordReader", new WordReader());
	WindowWordCount windowWordCount = dag.addOperator("windowWordCount", new WindowWordCount());
	FileWordCount fileWordCount = dag.addOperator("fileWordCount", new FileWordCount());
	WordCountWriter wcWriter = dag.addOperator("wcWriter", new WordCountWriter());
	ConsoleOutputOperator console = dag.addOperator("console", new ConsoleOutputOperator());
	console.setStringFormat("wordCount: %s");

	// create streams

	dag.addStream("lines", lineReader.output, wordReader.input);
	dag.addStream("control", lineReader.control, fileWordCount.control);
	dag.addStream("words", wordReader.output, windowWordCount.input);
	dag.addStream("windowWordCounts", windowWordCount.output, fileWordCount.input);
	dag.addStream("fileWordCounts", fileWordCount.fileOutput, wcWriter.input);

	if (PubSubHelper.isGatewayConfigured(dag)) { // add query support
	URI uri = PubSubHelper.getURI(dag);

	AppDataSnapshotServerMap snapshotServerFile
	= dag.addOperator("snapshotServerFile", new AppDataSnapshotServerMap());
	AppDataSnapshotServerMap snapshotServerGlobal
	= dag.addOperator("snapshotServerGlobal", new AppDataSnapshotServerMap());

	String snapshotServerJSON = SchemaUtils.jarResourceFileToString(SNAPSHOT_SCHEMA);
	snapshotServerFile.setSnapshotSchemaJSON(snapshotServerJSON);
	snapshotServerGlobal.setSnapshotSchemaJSON(snapshotServerJSON);

	PubSubWebSocketAppDataQuery wsQueryFile = new PubSubWebSocketAppDataQuery();
	PubSubWebSocketAppDataQuery wsQueryGlobal = new PubSubWebSocketAppDataQuery();
	wsQueryFile.setUri(uri);
	wsQueryGlobal.setUri(uri);

	snapshotServerFile.setEmbeddableQueryInfoProvider(wsQueryFile);
	snapshotServerGlobal.setEmbeddableQueryInfoProvider(wsQueryGlobal);

	PubSubWebSocketAppDataResult wsResultFile
	= dag.addOperator("wsResultFile", new PubSubWebSocketAppDataResult());
	PubSubWebSocketAppDataResult wsResultGlobal
	= dag.addOperator("wsResultGlobal", new PubSubWebSocketAppDataResult());
	wsResultFile.setUri(uri);
	wsResultGlobal.setUri(uri);

	Operator.InputPort<String> queryResultFilePort = wsResultFile.input;
	Operator.InputPort<String> queryResultGlobalPort = wsResultGlobal.input;

	dag.addStream("WordCountsFile", fileWordCount.outputPerFile, snapshotServerFile.input, console.input);
	dag.addStream("WordCountsGlobal", fileWordCount.outputGlobal, snapshotServerGlobal.input);

	dag.addStream("ResultFile", snapshotServerFile.queryResult, queryResultFilePort);
	dag.addStream("ResultGlobal", snapshotServerGlobal.queryResult, queryResultGlobalPort);
	} else {
	//throw new RuntimeException("Error: No GATEWAY_CONNECT_ADDRESS");
	dag.addStream("WordCounts", fileWordCount.outputPerFile, console.input);
	}

	LOG.info("done with populateDAG, isDebugEnabled = " + LOG.isDebugEnabled());
	LOG.info("Returning from populateDAG");
	}

	}