heron/api/src/java/com/twitter/heron/api/topology/TopologyBuilder.java - incubator-heron - Git at Google

 // Copyright 2016 Twitter. All rights reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //    http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package com.twitter.heron.api.topology;

 import java.util.HashMap;
 import java.util.Map;

 import com.twitter.heron.api.HeronTopology;
 import com.twitter.heron.api.bolt.BasicBoltExecutor;
 import com.twitter.heron.api.bolt.IBasicBolt;
 import com.twitter.heron.api.bolt.IRichBolt;
 import com.twitter.heron.api.generated.TopologyAPI;
 import com.twitter.heron.api.spout.IRichSpout;

 /**
  * TopologyBuilder exposes the Java API for specifying a topology for Heron
  * to execute. Topologies are Thrift structures in the end, but since the Thrift API
  * is so verbose, TopologyBuilder greatly eases the process of creating topologies.
  * The template for creating and submitting a topology looks something like:
  * <p>
  * <pre>
  * TopologyBuilder builder = new TopologyBuilder();
  *
  * builder.setSpout("1", new TestWordSpout(true), 5);
  * builder.setSpout("2", new TestWordSpout(true), 3);
  * builder.setBolt("3", new TestWordCounter(), 3)
  *          .fieldsGrouping("1", new Fields("word"))
  *          .fieldsGrouping("2", new Fields("word"));
  * builder.setBolt("4", new TestGlobalCount())
  *          .globalGrouping("1");
  *
  * Map conf = new HashMap();
  * conf.put(Config.TOPOLOGY_WORKERS, 4);
  *
  * HeronSubmitter.submitTopology("mytopology", conf, builder.createTopology());
  * </pre>
  * <p>
  * Running the exact same topology in simulator (in process), and configuring it to log all tuples
  * emitted, looks like the following. Note that it lets the topology run for 10 seconds
  * before shutting down the local cluster.
  * <p>
  * <pre>
  * TopologyBuilder builder = new TopologyBuilder();
  *
  * builder.setSpout("1", new TestWordSpout(true), 5);
  * builder.setSpout("2", new TestWordSpout(true), 3);
  * builder.setBolt("3", new TestWordCounter(), 3)
  *          .fieldsGrouping("1", new Fields("word"))
  *          .fieldsGrouping("2", new Fields("word"));
  * builder.setBolt("4", new TestGlobalCount())
  *          .globalGrouping("1");
  *
  * Map conf = new HashMap();
  * conf.put(Config.TOPOLOGY_WORKERS, 4);
  * conf.put(Config.TOPOLOGY_DEBUG, true);
  *
  * LocalCluster cluster = new LocalCluster();
  * cluster.submitTopology("mytopology", conf, builder.createTopology());
  * Utils.sleep(10000);
  * cluster.shutdown();
  * </pre>
  * <p>
  * <p>The pattern for TopologyBuilder is to map component ids to components using the setSpout
  * and setBolt methods. Those methods return objects that are then used to declare
  * the inputs for that component.</p>
  */
 public class TopologyBuilder {
   private Map<String, BoltDeclarer> bolts = new HashMap<String, BoltDeclarer>();
   private Map<String, SpoutDeclarer> spouts = new HashMap<String, SpoutDeclarer>();


   public HeronTopology createTopology() {
     TopologyAPI.Topology.Builder bldr = TopologyAPI.Topology.newBuilder();
     // First go thru the spouts
     for (Map.Entry<String, SpoutDeclarer> spout : spouts.entrySet()) {
       spout.getValue().dump(bldr);
     }
     // Then go thru the bolts
     for (Map.Entry<String, BoltDeclarer> bolt : bolts.entrySet()) {
       bolt.getValue().dump(bldr);
     }
     return new HeronTopology(bldr);
   }

   /**
    * Define a new bolt in this topology with parallelism of just one thread.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
    * @param bolt the bolt
    * @return use the returned object to declare the inputs to this component
    */
   public BoltDeclarer setBolt(String id, IRichBolt bolt) {
     return setBolt(id, bolt, null);
   }

   /**
    * Define a new bolt in this topology with the specified amount of parallelism.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
    * @param bolt the bolt
    * @param parallelismHint the number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a process somewhere around the cluster.
    * @return use the returned object to declare the inputs to this component
    */
   public BoltDeclarer setBolt(String id, IRichBolt bolt, Number parallelismHint) {
     validateComponentName(id);
     BoltDeclarer b = new BoltDeclarer(id, bolt, parallelismHint);
     bolts.put(id, b);
     return b;
   }

   /**
    * Define a new bolt in this topology. This defines a basic bolt, which is a
    * simpler to use but more restricted kind of bolt. Basic bolts are intended
    * for non-aggregation processing and automate the anchoring/acking process to
    * achieve proper reliability in the topology.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
    * @param bolt the basic bolt
    * @return use the returned object to declare the inputs to this component
    */
   public BoltDeclarer setBolt(String id, IBasicBolt bolt) {
     return setBolt(id, bolt, null);
   }

   /**
    * Define a new bolt in this topology. This defines a basic bolt, which is a
    * simpler to use but more restricted kind of bolt. Basic bolts are intended
    * for non-aggregation processing and automate the anchoring/acking process to
    * achieve proper reliability in the topology.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
    * @param bolt the basic bolt
    * @param parallelismHint the number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a process somwehere around the cluster.
    * @return use the returned object to declare the inputs to this component
    */
   public BoltDeclarer setBolt(String id, IBasicBolt bolt, Number parallelismHint) {
     return setBolt(id, new BasicBoltExecutor(bolt), parallelismHint);
   }

   /**
    * Define a new spout in this topology.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this spout's outputs.
    * @param spout the spout
    */
   public SpoutDeclarer setSpout(String id, IRichSpout spout) {
     return setSpout(id, spout, null);
   }

   /**
    * Define a new spout in this topology with the specified parallelism. If the spout declares
    * itself as non-distributed, the parallelismHint will be ignored and only one task
    * will be allocated to this component.
    *
    * @param id the id of this component. This id is referenced by other components that want to consume this spout's outputs.
    * @param parallelismHint the number of tasks that should be assigned to execute this spout. Each task will run on a thread in a process somwehere around the cluster.
    * @param spout the spout
    */
   public SpoutDeclarer setSpout(String id, IRichSpout spout, Number parallelismHint) {
     validateComponentName(id);
     SpoutDeclarer s = new SpoutDeclarer(id, spout, parallelismHint);
     spouts.put(id, s);
     return s;
   }

   private void validateComponentName(String name) {
     if (name.contains(",")) {
       throw new IllegalArgumentException("Component name should not contain comma(,)");
     }
     if (name.contains(":")) {
       throw new IllegalArgumentException("Component name should not contain colon(:)");
     }
     validateUnusedName(name);
   }

   private void validateUnusedName(String name) {
     if (bolts.containsKey(name)) {
       throw new IllegalArgumentException("Bolt has already been declared for name " + name);
     }
     if (spouts.containsKey(name)) {
       throw new IllegalArgumentException("Spout has already been declared for name " + name);
     }
   }
 }
	// Copyright 2016 Twitter. All rights reserved.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package com.twitter.heron.api.topology;

	import java.util.HashMap;
	import java.util.Map;

	import com.twitter.heron.api.HeronTopology;
	import com.twitter.heron.api.bolt.BasicBoltExecutor;
	import com.twitter.heron.api.bolt.IBasicBolt;
	import com.twitter.heron.api.bolt.IRichBolt;
	import com.twitter.heron.api.generated.TopologyAPI;
	import com.twitter.heron.api.spout.IRichSpout;

	/**
	* TopologyBuilder exposes the Java API for specifying a topology for Heron
	* to execute. Topologies are Thrift structures in the end, but since the Thrift API
	* is so verbose, TopologyBuilder greatly eases the process of creating topologies.
	* The template for creating and submitting a topology looks something like:
	* <p>
	* <pre>
	* TopologyBuilder builder = new TopologyBuilder();
	*
	* builder.setSpout("1", new TestWordSpout(true), 5);
	* builder.setSpout("2", new TestWordSpout(true), 3);
	* builder.setBolt("3", new TestWordCounter(), 3)
	* .fieldsGrouping("1", new Fields("word"))
	* .fieldsGrouping("2", new Fields("word"));
	* builder.setBolt("4", new TestGlobalCount())
	* .globalGrouping("1");
	*
	* Map conf = new HashMap();
	* conf.put(Config.TOPOLOGY_WORKERS, 4);
	*
	* HeronSubmitter.submitTopology("mytopology", conf, builder.createTopology());
	* </pre>
	* <p>
	* Running the exact same topology in simulator (in process), and configuring it to log all tuples
	* emitted, looks like the following. Note that it lets the topology run for 10 seconds
	* before shutting down the local cluster.
	* <p>
	* <pre>
	* TopologyBuilder builder = new TopologyBuilder();
	*
	* builder.setSpout("1", new TestWordSpout(true), 5);
	* builder.setSpout("2", new TestWordSpout(true), 3);
	* builder.setBolt("3", new TestWordCounter(), 3)
	* .fieldsGrouping("1", new Fields("word"))
	* .fieldsGrouping("2", new Fields("word"));
	* builder.setBolt("4", new TestGlobalCount())
	* .globalGrouping("1");
	*
	* Map conf = new HashMap();
	* conf.put(Config.TOPOLOGY_WORKERS, 4);
	* conf.put(Config.TOPOLOGY_DEBUG, true);
	*
	* LocalCluster cluster = new LocalCluster();
	* cluster.submitTopology("mytopology", conf, builder.createTopology());
	* Utils.sleep(10000);
	* cluster.shutdown();
	* </pre>
	* <p>
	* <p>The pattern for TopologyBuilder is to map component ids to components using the setSpout
	* and setBolt methods. Those methods return objects that are then used to declare
	* the inputs for that component.</p>
	*/
	public class TopologyBuilder {
	private Map<String, BoltDeclarer> bolts = new HashMap<String, BoltDeclarer>();
	private Map<String, SpoutDeclarer> spouts = new HashMap<String, SpoutDeclarer>();


	public HeronTopology createTopology() {
	TopologyAPI.Topology.Builder bldr = TopologyAPI.Topology.newBuilder();
	// First go thru the spouts
	for (Map.Entry<String, SpoutDeclarer> spout : spouts.entrySet()) {
	spout.getValue().dump(bldr);
	}
	// Then go thru the bolts
	for (Map.Entry<String, BoltDeclarer> bolt : bolts.entrySet()) {
	bolt.getValue().dump(bldr);
	}
	return new HeronTopology(bldr);
	}

	/**
	* Define a new bolt in this topology with parallelism of just one thread.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
	* @param bolt the bolt
	* @return use the returned object to declare the inputs to this component
	*/
	public BoltDeclarer setBolt(String id, IRichBolt bolt) {
	return setBolt(id, bolt, null);
	}

	/**
	* Define a new bolt in this topology with the specified amount of parallelism.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
	* @param bolt the bolt
	* @param parallelismHint the number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a process somewhere around the cluster.
	* @return use the returned object to declare the inputs to this component
	*/
	public BoltDeclarer setBolt(String id, IRichBolt bolt, Number parallelismHint) {
	validateComponentName(id);
	BoltDeclarer b = new BoltDeclarer(id, bolt, parallelismHint);
	bolts.put(id, b);
	return b;
	}

	/**
	* Define a new bolt in this topology. This defines a basic bolt, which is a
	* simpler to use but more restricted kind of bolt. Basic bolts are intended
	* for non-aggregation processing and automate the anchoring/acking process to
	* achieve proper reliability in the topology.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
	* @param bolt the basic bolt
	* @return use the returned object to declare the inputs to this component
	*/
	public BoltDeclarer setBolt(String id, IBasicBolt bolt) {
	return setBolt(id, bolt, null);
	}

	/**
	* Define a new bolt in this topology. This defines a basic bolt, which is a
	* simpler to use but more restricted kind of bolt. Basic bolts are intended
	* for non-aggregation processing and automate the anchoring/acking process to
	* achieve proper reliability in the topology.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this bolt's outputs.
	* @param bolt the basic bolt
	* @param parallelismHint the number of tasks that should be assigned to execute this bolt. Each task will run on a thread in a process somwehere around the cluster.
	* @return use the returned object to declare the inputs to this component
	*/
	public BoltDeclarer setBolt(String id, IBasicBolt bolt, Number parallelismHint) {
	return setBolt(id, new BasicBoltExecutor(bolt), parallelismHint);
	}

	/**
	* Define a new spout in this topology.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this spout's outputs.
	* @param spout the spout
	*/
	public SpoutDeclarer setSpout(String id, IRichSpout spout) {
	return setSpout(id, spout, null);
	}

	/**
	* Define a new spout in this topology with the specified parallelism. If the spout declares
	* itself as non-distributed, the parallelismHint will be ignored and only one task
	* will be allocated to this component.
	*
	* @param id the id of this component. This id is referenced by other components that want to consume this spout's outputs.
	* @param parallelismHint the number of tasks that should be assigned to execute this spout. Each task will run on a thread in a process somwehere around the cluster.
	* @param spout the spout
	*/
	public SpoutDeclarer setSpout(String id, IRichSpout spout, Number parallelismHint) {
	validateComponentName(id);
	SpoutDeclarer s = new SpoutDeclarer(id, spout, parallelismHint);
	spouts.put(id, s);
	return s;
	}

	private void validateComponentName(String name) {
	if (name.contains(",")) {
	throw new IllegalArgumentException("Component name should not contain comma(,)");
	}
	if (name.contains(":")) {
	throw new IllegalArgumentException("Component name should not contain colon(:)");
	}
	validateUnusedName(name);
	}

	private void validateUnusedName(String name) {
	if (bolts.containsKey(name)) {
	throw new IllegalArgumentException("Bolt has already been declared for name " + name);
	}
	if (spouts.containsKey(name)) {
	throw new IllegalArgumentException("Spout has already been declared for name " + name);
	}
	}
	}