contrib/src/main/java/org/apache/apex/malhar/contrib/kafka/AbstractKafkaInputOperator.java - apex-malhar - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package com.datatorrent.contrib.kafka;

 import com.datatorrent.api.Context.OperatorContext;
 import com.datatorrent.api.DefaultPartition;
 import com.datatorrent.api.InputOperator;
 import com.datatorrent.api.Operator;
 import com.datatorrent.api.Operator.ActivationListener;
 import com.datatorrent.api.Partitioner;
 import com.datatorrent.api.Stats;
 import com.datatorrent.api.StatsListener;
 import com.datatorrent.api.annotation.OperatorAnnotation;
 import com.datatorrent.api.annotation.Stateless;
 import com.datatorrent.lib.util.KryoCloneUtils;
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;

 import kafka.api.FetchRequest;
 import kafka.api.FetchRequestBuilder;
 import kafka.cluster.Broker;
 import kafka.javaapi.FetchResponse;
 import kafka.javaapi.PartitionMetadata;
 import kafka.javaapi.consumer.SimpleConsumer;
 import kafka.message.Message;
 import kafka.message.MessageAndOffset;

 import org.apache.apex.malhar.lib.wal.WindowDataManager;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.commons.lang3.tuple.MutablePair;
 import org.apache.commons.lang3.tuple.Pair;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import javax.validation.Valid;
 import javax.validation.constraints.Min;
 import javax.validation.constraints.NotNull;

 import java.io.IOException;
 import java.lang.reflect.Array;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import static com.datatorrent.contrib.kafka.KafkaConsumer.KafkaMeterStatsUtil.getOffsetsForPartitions;

 /**
  * This is a base implementation of a Kafka input operator, which consumes data from Kafka message bus.&nbsp;
  * Subclasses should implement the method for emitting tuples to downstream operators.
  * It will be dynamically partitioned based on the upstream kafka partition.
  * <p>
  * <b>Partition Strategy:</b>
  * <p><b>1. ONE_TO_ONE partition</b> Each operator partition will consume from only one kafka partition </p>
  * <p><b>2. ONE_TO_MANY partition</b> Each operator partition consumer from multiple kafka partition with some hard ingestion rate limit</p>
  * <p><b>3. ONE_TO_MANY_HEURISTIC partition</b>(Not implemented yet) Each operator partition consumer from multiple kafka partition and partition number depends on heuristic function(real time bottle neck)</p>
  * <p><b>Note:</b> ONE_TO_MANY partition only support simple kafka consumer because
  * <p>  1) high-level consumer can only balance the number of brokers it consumes from rather than the actual load from each broker</p>
  * <p>  2) high-level consumer can not reset offset once it's committed so the tuples are not replayable </p>
  * <p></p>
  * <br>
  * <br>
  * <b>Basic Algorithm:</b>
  * <p>1.Pull the metadata(how many partitions) of the topic from brokerList of {@link KafkaConsumer}</p>
  * <p>2.cloneConsumer method is used to initialize the new {@link KafkaConsumer} instance for the new partition operator</p>
  * <p>3.cloneOperator method is used to initialize the new {@link AbstractKafkaInputOperator} instance for the new partition operator</p>
  * <p>4.ONE_TO_MANY partition use first-fit decreasing algorithm(http://en.wikipedia.org/wiki/Bin_packing_problem) to minimize the partition operator
  * <br>
  * <br>
  * <b>Load balance:</b> refer to {@link SimpleKafkaConsumer} and {@link HighlevelKafkaConsumer} <br>
  * <b>Kafka partition failover:</b> refer to {@link SimpleKafkaConsumer} and {@link HighlevelKafkaConsumer}
  * <br>
  * <br>
  * <b>Self adjust to Kafka partition change:</b>
  * <p><b>EACH</b> operator partition periodically check the leader broker(s) change which it consumes from and adjust connection without repartition</p>
  * <p><b>ONLY APPMASTER</b> operator periodically check overall kafka partition layout and add operator partition due to kafka partition add(no delete supported by kafka for now)</p>
  * <br>
  * <br>
  * </p>
  * Properties:<br>
  * <b>tuplesBlast</b>: Number of tuples emitted in each burst<br>
  * <b>bufferSize</b>: Size of holding buffer<br>
  * <br>
  * Compile time checks:<br>
  * Class derived from this has to implement the abstract method emitTuple() <br>
  * <br>
  * Run time checks:<br>
  * None<br>
  * <br>
  * Benchmarks:<br>
  * TBD<br>
  * <br>
  *
  * Each operator can consume 1 topic from multiple partitions and clusters<br>
  * </p>
  *
  * @displayName Abstract Kafka Input
  * @category Messaging
  * @tags input operator
  *
  * @since 0.3.2
  */

 @OperatorAnnotation(partitionable = true)
 public abstract class AbstractKafkaInputOperator<K extends KafkaConsumer> implements InputOperator, ActivationListener<OperatorContext>, Operator.CheckpointNotificationListener, Partitioner<AbstractKafkaInputOperator<K>>, StatsListener
 {
   private static final Logger logger = LoggerFactory.getLogger(AbstractKafkaInputOperator.class);

   @Min(1)
   private int maxTuplesPerWindow = Integer.MAX_VALUE;
   @Min(1)
   private long maxTotalMsgSizePerWindow = Long.MAX_VALUE;
   private transient int emitCount = 0;
   private transient long emitTotalMsgSize = 0;
   protected WindowDataManager windowDataManager;
   protected transient long currentWindowId;
   protected transient int operatorId;
   protected final transient Map<KafkaPartition, MutablePair<Long, Integer>> currentWindowRecoveryState;
   /**
    * Offsets that are checkpointed for recovery
    */
   protected Map<KafkaPartition, Long> offsetStats = new HashMap<KafkaPartition, Long>();
   /**
    * offset history with window id
    */
   protected transient List<Pair<Long, Map<KafkaPartition, Long>>> offsetTrackHistory = new LinkedList<>();
   private transient OperatorContext context = null;
   // By default the partition policy is 1:1
   public PartitionStrategy strategy = PartitionStrategy.ONE_TO_ONE;

   // Deprecated: Please don't use this property.
   @Deprecated
   private long msgRateUpperBound = Long.MAX_VALUE;

   // Deprecated: Please don't use this property.
   @Deprecated
   private long byteRateUpperBound = Long.MAX_VALUE;

   // Store the current operator partition topology
   private transient List<PartitionInfo> currentPartitionInfo = Lists.newLinkedList();

   // Store the current collected kafka consumer stats
   private transient Map<Integer, List<KafkaConsumer.KafkaMeterStats>> kafkaStatsHolder = new HashMap<Integer, List<KafkaConsumer.KafkaMeterStats>>();

   private OffsetManager offsetManager = null;

   // Minimal interval between 2 (re)partition actions
   private long repartitionInterval = 30000L;

   // Minimal interval between checking collected stats and decide whether it needs to repartition or not.
   // And minimal interval between 2 offset updates
   private long repartitionCheckInterval = 5000L;

   private transient long lastCheckTime = 0L;

   private transient long lastRepartitionTime = 0L;

   // A list store the newly discovered partitions
   private transient List<KafkaPartition> newWaitingPartition = new LinkedList<KafkaPartition>();

   private transient KafkaConsumer.KafkaMessage pendingMessage;

   @Min(1)
   private int initialPartitionCount = 1;

   @NotNull
   @Valid
   protected KafkaConsumer consumer = new SimpleKafkaConsumer();

   public AbstractKafkaInputOperator()
   {
     windowDataManager = new WindowDataManager.NoopWindowDataManager();
     currentWindowRecoveryState = new HashMap<KafkaPartition, MutablePair<Long, Integer>>();
   }

   /**
    * Any concrete class derived from KafkaInputOperator has to implement this method to emit tuples to an output port.
    *
    */
   protected abstract void emitTuple(Message message);

   /**
    * Concrete class derived from KafkaInputOpertor should implement this method if it wants to access kafka offset and partitionId along with kafka message.
    */
   protected void emitTuple(KafkaConsumer.KafkaMessage message)
   {
     emitTuple(message.msg);
   }

   public int getMaxTuplesPerWindow()
   {
     return maxTuplesPerWindow;
   }

   public void setMaxTuplesPerWindow(int maxTuplesPerWindow)
   {
     this.maxTuplesPerWindow = maxTuplesPerWindow;
   }

   /**
    * Get the maximum total size of messages to be transmitted per window. When the sum of the message sizes transmitted
    * in a window reaches this limit no more messages are transmitted till the next window. There is one exception
    * however, if the size of the first message in a window is greater than the limit it is still transmitted so that the
    * processing of messages doesn't get stuck.
    * @return The maximum for the total size
      */
   public long getMaxTotalMsgSizePerWindow() {
     return maxTotalMsgSizePerWindow;
   }

   /**
    * Set the maximum total size of messages to be transmitted per window. See {@link #getMaxTotalMsgSizePerWindow()} for
    * more description about this property.
    *
    * @param maxTotalMsgSizePerWindow The maximum for the total size
      */
   public void setMaxTotalMsgSizePerWindow(long maxTotalMsgSizePerWindow) {
     this.maxTotalMsgSizePerWindow = maxTotalMsgSizePerWindow;
   }

   @Override
   public void setup(OperatorContext context)
   {
     logger.debug("consumer {} topic {} cacheSize {}", consumer, consumer.getTopic(), consumer.getCacheSize());
     consumer.create();
     // reset the offsets to checkpointed one
     if (consumer instanceof SimpleKafkaConsumer && !offsetStats.isEmpty()) {
       Map<KafkaPartition, Long> currentOffsets = new HashMap<>();
       // Increment the offsets and set it to consumer
       for (Map.Entry<KafkaPartition, Long> e: offsetStats.entrySet()) {
         currentOffsets.put(e.getKey(), e.getValue() + 1);
       }
       ((SimpleKafkaConsumer)consumer).resetOffset(currentOffsets);
     }
     this.context = context;
     operatorId = context.getId();
     if(consumer instanceof HighlevelKafkaConsumer && !(windowDataManager instanceof WindowDataManager.NoopWindowDataManager)) {
       throw new RuntimeException("Idempotency is not supported for High Level Kafka Consumer");
     }
     windowDataManager.setup(context);
   }

   @Override
   public void teardown()
   {
     windowDataManager.teardown();
     consumer.teardown();
   }

   @Override
   public void beginWindow(long windowId)
   {
     currentWindowId = windowId;
     if (windowId <= windowDataManager.getLargestCompletedWindow()) {
       replay(windowId);
     }
     emitCount = 0;
     emitTotalMsgSize = 0;
   }

   protected void replay(long windowId)
   {
     try {
       @SuppressWarnings("unchecked")
       Map<KafkaPartition, MutablePair<Long, Integer>> recoveredData = (Map<KafkaPartition, MutablePair<Long, Integer>>)
           windowDataManager.retrieve(windowId);
       if (recoveredData != null) {
         Map<String, List<PartitionMetadata>> pms = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().topic);
         if (pms != null) {
           SimpleKafkaConsumer cons = (SimpleKafkaConsumer) getConsumer();
           // add all partition request in one Fretch request together
           FetchRequestBuilder frb = new FetchRequestBuilder().clientId(cons.getClientId());
           for (Map.Entry<KafkaPartition, MutablePair<Long, Integer>> rc : recoveredData.entrySet()) {
             KafkaPartition kp = rc.getKey();
             List<PartitionMetadata> pmsVal = pms.get(kp.getClusterId());

             Iterator<PartitionMetadata> pmIterator = pmsVal.iterator();
             PartitionMetadata pm = pmIterator.next();
             while (pm.partitionId() != kp.getPartitionId()) {
               if (!pmIterator.hasNext())
                 break;
               pm = pmIterator.next();
             }
             if (pm.partitionId() != kp.getPartitionId())
               continue;

             Broker bk = pm.leader();

             frb.addFetch(consumer.topic, rc.getKey().getPartitionId(), rc.getValue().left, cons.getBufferSize());
             FetchRequest req = frb.build();

             SimpleConsumer ksc = new SimpleConsumer(bk.host(), bk.port(), cons.getTimeout(), cons.getBufferSize(), cons.getClientId());
             FetchResponse fetchResponse = ksc.fetch(req);
             Integer count = 0;
             for (MessageAndOffset msg : fetchResponse.messageSet(consumer.topic, kp.getPartitionId())) {
               KafkaConsumer.KafkaMessage kafkaMessage = new KafkaConsumer.KafkaMessage(kp, msg.message(), msg.offset());
               emitTuple(kafkaMessage);
               offsetStats.put(kp, msg.offset());
               count = count + 1;
               if (count.equals(rc.getValue().right))
                 break;
             }
           }
         }
       }
       if(windowId == windowDataManager.getLargestCompletedWindow()) {
         // Start the consumer at the largest recovery window
         SimpleKafkaConsumer cons = (SimpleKafkaConsumer)getConsumer();
         // Set the offset positions to the consumer
         Map<KafkaPartition, Long> currentOffsets = new HashMap<KafkaPartition, Long>(cons.getCurrentOffsets());
         // Increment the offsets
         for (Map.Entry<KafkaPartition, Long> e: offsetStats.entrySet()) {
           currentOffsets.put(e.getKey(), e.getValue() + 1);
         }
         cons.resetOffset(currentOffsets);
         cons.start();
       }
     }
     catch (IOException e) {
       throw new RuntimeException("replay", e);
     }
   }

   @Override
   public void endWindow()
   {
     //TODO depends on APEX-78 only needs to keep the history of windows needs to be commit
     if (getConsumer() instanceof SimpleKafkaConsumer) {
       Map<KafkaPartition, Long> carryOn = new HashMap<>(offsetStats);
       offsetTrackHistory.add(Pair.of(currentWindowId, carryOn));
     }
     if (currentWindowId > windowDataManager.getLargestCompletedWindow()) {
       try {
         windowDataManager.save(currentWindowRecoveryState, currentWindowId);
       }
       catch (IOException e) {
         throw new RuntimeException("saving recovery", e);
       }
     }
     currentWindowRecoveryState.clear();
   }

   @Override
   public void checkpointed(long windowId)
   {
     // commit the consumer offset
     getConsumer().commitOffset();
   }

   @Override
   public void beforeCheckpoint(long windowId)
   {

   }

   @Override
   public void committed(long windowId)
   {
     if ((getConsumer() instanceof  SimpleKafkaConsumer)) {
       SimpleKafkaConsumer cons = (SimpleKafkaConsumer)getConsumer();
       for (Iterator<Pair<Long, Map<KafkaPartition, Long>>> iter = offsetTrackHistory.iterator(); iter.hasNext(); ) {
         Pair<Long, Map<KafkaPartition, Long>> item = iter.next();
         if (item.getLeft() < windowId) {
           iter.remove();
           continue;
         } else if (item.getLeft() == windowId) {
           if (logger.isDebugEnabled()) {
             logger.debug("report offsets {} ", Joiner.on(';').withKeyValueSeparator("=").join(item.getRight()));
           }
           context.setCounters(cons.getConsumerStats(item.getRight()));
         }
         break;
       }
     }

     try {
       windowDataManager.committed(windowId);
     }
     catch (IOException e) {
       throw new RuntimeException("deleting state", e);
     }
   }

   @Override
   public void activate(OperatorContext ctx)
   {
     if (context.getValue(OperatorContext.ACTIVATION_WINDOW_ID) != Stateless.WINDOW_ID &&
         context.getValue(OperatorContext.ACTIVATION_WINDOW_ID) < windowDataManager.getLargestCompletedWindow()) {
       // If it is a replay state, don't start the consumer
       return;
     }
     // Don't start thread here!
     // # of kafka_consumer_threads depends on the type of kafka client and the message
     // metadata(topic/partition/replica) layout
     consumer.start();
   }

   @Override
   public void deactivate()
   {
     consumer.stop();
   }

   @Override
   public void emitTuples()
   {
     if (currentWindowId <= windowDataManager.getLargestCompletedWindow()) {
       return;
     }
     int count = consumer.messageSize() + ((pendingMessage != null) ? 1 : 0);
     if (maxTuplesPerWindow > 0) {
       count = Math.min(count, maxTuplesPerWindow - emitCount);
     }
     KafkaConsumer.KafkaMessage message = null;
     for (int i = 0; i < count; i++) {
       if (pendingMessage != null) {
         message = pendingMessage;
         pendingMessage = null;
       } else {
         message = consumer.pollMessage();
       }
       // If the total size transmitted in the window will be exceeded don't transmit anymore messages in this window
       // Make an exception for the case when no message has been transmitted in the window and transmit at least one
       // message even if the condition is violated so that the processing doesn't get stuck
       if ((emitCount > 0) && ((maxTotalMsgSizePerWindow - emitTotalMsgSize) < message.msg.size())) {
         pendingMessage = message;
         break;
       }
       emitTuple(message);
       emitCount++;
       emitTotalMsgSize += message.msg.size();
       offsetStats.put(message.kafkaPart, message.offSet);
       MutablePair<Long, Integer> offsetAndCount = currentWindowRecoveryState.get(message.kafkaPart);
       if(offsetAndCount == null) {
         currentWindowRecoveryState.put(message.kafkaPart, new MutablePair<Long, Integer>(message.offSet, 1));
       } else {
         offsetAndCount.setRight(offsetAndCount.right+1);
       }
     }
   }

   public void setConsumer(K consumer)
   {
     this.consumer = consumer;
   }

   public KafkaConsumer getConsumer()
   {
     return consumer;
   }

   /**
    * Set the Topic.
    * @omitFromUI
    */
   @Deprecated
   public void setTopic(String topic)
   {
     this.consumer.setTopic(topic);
   }

   /**
    * Set the ZooKeeper quorum of the Kafka cluster(s) you want to consume data from.
    * The operator will discover the brokers that it needs to consume messages from.
    * @omitFromUI
    */
   @Deprecated
   public void setZookeeper(String zookeeperString)
   {
     this.consumer.setZookeeper(zookeeperString);
   }

   @Override
   public void partitioned(Map<Integer, Partitioner.Partition<AbstractKafkaInputOperator<K>>> partitions)
   {
     // update the last repartition time
     lastRepartitionTime = System.currentTimeMillis();
   }

   @Override
   public Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> definePartitions(Collection<Partitioner.Partition<AbstractKafkaInputOperator<K>>> partitions, Partitioner.PartitioningContext context)
   {
     // Initialize brokers from zookeepers
     getConsumer().initBrokers();

     boolean isInitialParitition = true;
     // check if it's the initial partition
     if(partitions.iterator().hasNext()) {
       isInitialParitition = partitions.iterator().next().getStats() == null;
     }

     // Operator partitions
     List<Partitioner.Partition<AbstractKafkaInputOperator<K>>> newPartitions = null;

     // initialize the offset
     Map<KafkaPartition, Long> initOffset = null;
     if(isInitialParitition && offsetManager !=null){
       initOffset = offsetManager.loadInitialOffsets();
       logger.info("Initial offsets: {} ", "{ " + Joiner.on(", ").useForNull("").withKeyValueSeparator(": ").join(initOffset) + " }");
     }

     Set<Integer> deletedOperators = Sets.newHashSet();
     Collection<Partition<AbstractKafkaInputOperator<K>>> resultPartitions = partitions;
     boolean numPartitionsChanged = false;

     switch (strategy) {

     // For the 1 to 1 mapping The framework will create number of operator partitions based on kafka topic partitions
     // Each operator partition will consume from only one kafka partition
     case ONE_TO_ONE:

       if (isInitialParitition) {
         lastRepartitionTime = System.currentTimeMillis();
         logger.info("[ONE_TO_ONE]: Initializing partition(s)");
         // get partition metadata for topics.
         // Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic
         // The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata
         Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic());

         // initialize the number of operator partitions according to number of kafka partitions

         newPartitions = new LinkedList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>();
         for (Map.Entry<String, List<PartitionMetadata>> kp : kafkaPartitions.entrySet()) {
           String clusterId = kp.getKey();
           for (PartitionMetadata pm : kp.getValue()) {
             logger.info("[ONE_TO_ONE]: Create operator partition for cluster {}, topic {}, kafka partition {} ", clusterId, getConsumer().topic, pm.partitionId());
             newPartitions.add(createPartition(Sets.newHashSet(new KafkaPartition(clusterId, consumer.topic, pm.partitionId())), initOffset));
           }
         }
         resultPartitions = newPartitions;
         numPartitionsChanged = true;
       }
       else if (newWaitingPartition.size() != 0) {
         // add partition for new kafka partition
         for (KafkaPartition newPartition : newWaitingPartition) {
           logger.info("[ONE_TO_ONE]: Add operator partition for cluster {}, topic {}, partition {}", newPartition.getClusterId(), getConsumer().topic, newPartition.getPartitionId());
           partitions.add(createPartition(Sets.newHashSet(newPartition), null));
         }
         newWaitingPartition.clear();
         resultPartitions = partitions;
         numPartitionsChanged = true;
       }
       break;
     // For the 1 to N mapping The initial partition number is defined by stream application
     // Afterwards, the framework will dynamically adjust the partition and allocate consumers to as less operator partitions as it can
     //  and guarantee the total intake rate for each operator partition is below some threshold
     case ONE_TO_MANY:

       if (getConsumer() instanceof HighlevelKafkaConsumer) {
         throw new UnsupportedOperationException("[ONE_TO_MANY]: The high-level consumer is not supported for ONE_TO_MANY partition strategy.");
       }

       if (isInitialParitition || newWaitingPartition.size() != 0) {
         lastRepartitionTime = System.currentTimeMillis();
         logger.info("[ONE_TO_MANY]: Initializing partition(s)");
         // get partition metadata for topics.
         // Whatever operator is using high-level or simple kafka consumer, the operator always create a temporary simple kafka consumer to get the metadata of the topic
         // The initial value of brokerList of the KafkaConsumer is used to retrieve the topic metadata
         Map<String, List<PartitionMetadata>> kafkaPartitions = KafkaMetadataUtil.getPartitionsForTopic(getConsumer().brokers, getConsumer().getTopic());

         int size = initialPartitionCount;
         @SuppressWarnings("unchecked")
         Set<KafkaPartition>[] kps = (Set<KafkaPartition>[]) Array.newInstance((new HashSet<KafkaPartition>()).getClass(), size);
         int i = 0;
         for (Map.Entry<String, List<PartitionMetadata>> en : kafkaPartitions.entrySet()) {
           String clusterId = en.getKey();
           for (PartitionMetadata pm : en.getValue()) {
             if (kps[i % size] == null) {
               kps[i % size] = new HashSet<KafkaPartition>();
             }
             kps[i % size].add(new KafkaPartition(clusterId, consumer.topic, pm.partitionId()));
             i++;
           }
         }
         size = i > size ? size : i;
         newPartitions = new ArrayList<Partitioner.Partition<AbstractKafkaInputOperator<K>>>(size);
         for (i = 0; i < size; i++) {
           logger.info("[ONE_TO_MANY]: Create operator partition for kafka partition(s): {} ", StringUtils.join(kps[i], ", "));
           newPartitions.add(createPartition(kps[i], initOffset));
         }
         // Add the existing partition Ids to the deleted operators
         for (Partition<AbstractKafkaInputOperator<K>> op : partitions)
         {
           deletedOperators.add(op.getPartitionedInstance().operatorId);
         }

         newWaitingPartition.clear();
         resultPartitions = newPartitions;
         numPartitionsChanged = true;
       }
       break;

     case ONE_TO_MANY_HEURISTIC:
       throw new UnsupportedOperationException("[ONE_TO_MANY_HEURISTIC]: Not implemented yet");
     default:
       break;
     }

     if (numPartitionsChanged) {
       List<WindowDataManager> managers = windowDataManager.partition(resultPartitions.size(), deletedOperators);
       int i = 0;
       for (Partition<AbstractKafkaInputOperator<K>> partition : resultPartitions) {
         partition.getPartitionedInstance().setWindowDataManager(managers.get(i++));
       }
     }
     return resultPartitions;
   }

   /**
    * Create a new partition with the partition Ids and initial offset positions
    *
    * @deprecated use {@link #createPartition(Set, Map)}
    */
   @Deprecated
   protected Partitioner.Partition<AbstractKafkaInputOperator<K>> createPartition(Set<KafkaPartition> pIds,
       Map<KafkaPartition, Long> initOffsets,
       @SuppressWarnings("UnusedParameters") Collection<WindowDataManager> newManagers)
   {
     return createPartition(pIds, initOffsets);
   }

   // Create a new partition with the partition Ids and initial offset positions
   protected Partitioner.Partition<AbstractKafkaInputOperator<K>> createPartition(Set<KafkaPartition> pIds,
       Map<KafkaPartition, Long> initOffsets)
   {

     Partitioner.Partition<AbstractKafkaInputOperator<K>> p = new DefaultPartition<>(KryoCloneUtils.cloneObject(this));
     if (p.getPartitionedInstance().getConsumer() instanceof SimpleKafkaConsumer) {
       p.getPartitionedInstance().getConsumer().resetPartitionsAndOffset(pIds, initOffsets);
       if (initOffsets != null) {
         //Don't send all offsets to all partitions
         //p.getPartitionedInstance().offsetStats.putAll(initOffsets);
         p.getPartitionedInstance().offsetStats.putAll(p.getPartitionedInstance().getConsumer().getCurrentOffsets());
       }
     }

     PartitionInfo pif = new PartitionInfo();
     pif.kpids = pIds;
     currentPartitionInfo.add(pif);
     return p;
   }

   @Override
   public StatsListener.Response processStats(StatsListener.BatchedOperatorStats stats)
   {
     StatsListener.Response resp = new StatsListener.Response();
     List<KafkaConsumer.KafkaMeterStats> kstats = extractKafkaStats(stats);
     resp.repartitionRequired = isPartitionRequired(stats.getOperatorId(), kstats);
     return resp;
   }

   private void updateOffsets(List<KafkaConsumer.KafkaMeterStats> kstats)
   {
     //In every partition check interval, call offsetmanager to update the offsets
     if (offsetManager != null) {
       Map<KafkaPartition, Long> offsetsForPartitions = getOffsetsForPartitions(kstats);
       if (offsetsForPartitions.size() > 0) {
         logger.debug("Passing offset updates to offset manager");
         offsetManager.updateOffsets(offsetsForPartitions);
       }
     }
   }

   private List<KafkaConsumer.KafkaMeterStats> extractKafkaStats(StatsListener.BatchedOperatorStats stats)
   {
     //preprocess the stats
     List<KafkaConsumer.KafkaMeterStats> kmsList = new LinkedList<KafkaConsumer.KafkaMeterStats>();
     for (Stats.OperatorStats os : stats.getLastWindowedStats()) {
       if (os != null && os.counters instanceof KafkaConsumer.KafkaMeterStats) {
         kmsList.add((KafkaConsumer.KafkaMeterStats) os.counters);
       }
     }
     return kmsList;
   }

   /**
    *
    * Check whether the operator needs repartition based on reported stats
    *
    * @return true if repartition is required
    * false if repartition is not required
    */
   private boolean isPartitionRequired(int opid, List<KafkaConsumer.KafkaMeterStats> kstats)
   {

     long t = System.currentTimeMillis();

     // If stats are available then update offsets
     // Do this before re-partition interval check below to not miss offset updates
     if (kstats.size() > 0) {
       logger.debug("Checking offset updates for offset manager");
       updateOffsets(kstats);
     }

     if (t - lastCheckTime < repartitionCheckInterval) {
       // return false if it's within repartitionCheckInterval since last time it check the stats
       return false;
     }

     if(repartitionInterval < 0){
       // if repartition is disabled
       return false;
     }

     if(t - lastRepartitionTime < repartitionInterval) {
       // return false if it's still within repartitionInterval since last (re)partition
       return false;
     }


     kafkaStatsHolder.put(opid, kstats);

     if (kafkaStatsHolder.size() != currentPartitionInfo.size() || currentPartitionInfo.size() == 0) {
       // skip checking if the operator hasn't collected all the stats from all the current partitions
       return false;
     }

     try {

       // monitor if new kafka partition added
       {
         Set<KafkaPartition> existingIds = new HashSet<KafkaPartition>();
         for (PartitionInfo pio : currentPartitionInfo) {
           existingIds.addAll(pio.kpids);
         }

         Map<String, List<PartitionMetadata>> partitionsMeta = KafkaMetadataUtil.getPartitionsForTopic(consumer.brokers, consumer.getTopic());
         if(partitionsMeta == null){
           //broker(s) has temporary issue to get metadata
           return false;
         }
         for (Map.Entry<String, List<PartitionMetadata>> en : partitionsMeta.entrySet()) {
           if(en.getValue() == null){
             //broker(s) has temporary issue to get metadata
             continue;
           }
           for (PartitionMetadata pm : en.getValue()) {
             KafkaPartition pa = new KafkaPartition(en.getKey(), consumer.topic, pm.partitionId());
             if(!existingIds.contains(pa)){
               newWaitingPartition.add(pa);
             }
           }
         }
         if (newWaitingPartition.size() != 0) {
           // found new kafka partition
           lastRepartitionTime = t;
           return true;
         }
       }

       return false;
     } finally {
       // update last  check time
       lastCheckTime = System.currentTimeMillis();
     }
   }

   public static enum PartitionStrategy
   {
     /**
      * Each operator partition connect to only one kafka partition
      */
     ONE_TO_ONE,
     /**
      * Each operator consumes from several kafka partitions with overall input rate under some certain hard limit in msgs/s or bytes/s
      * For now it <b>only</b> support <b>simple kafka consumer</b>
      */
     ONE_TO_MANY,
     /**
      * 1 to N partition based on the heuristic function
      * <b>NOT</b> implemented yet
      * TODO implement this later
      */
     ONE_TO_MANY_HEURISTIC
   }

   static class PartitionInfo
   {
     Set<KafkaPartition> kpids;
     long msgRateLeft;
     long byteRateLeft;
   }

   public WindowDataManager getWindowDataManager()
   {
     return windowDataManager;
   }

   public void setWindowDataManager(WindowDataManager windowDataManager)
   {
     this.windowDataManager = windowDataManager;
   }

   public void setInitialPartitionCount(int partitionCount)
   {
     this.initialPartitionCount = partitionCount;
   }

   public int getInitialPartitionCount()
   {
     return initialPartitionCount;
   }

   public long getMsgRateUpperBound()
   {
     return msgRateUpperBound;
   }

   public void setMsgRateUpperBound(long msgRateUpperBound)
   {
     this.msgRateUpperBound = msgRateUpperBound;
   }

   public long getByteRateUpperBound()
   {
     return byteRateUpperBound;
   }

   public void setByteRateUpperBound(long byteRateUpperBound)
   {
     this.byteRateUpperBound = byteRateUpperBound;
   }

   public void setInitialOffset(String initialOffset)
   {
     this.consumer.initialOffset = initialOffset;
   }

   public void setOffsetManager(OffsetManager offsetManager)
   {
     this.offsetManager = offsetManager;
   }

   public OffsetManager getOffsetManager()
   {
     return offsetManager;
   }

   public void setRepartitionCheckInterval(long repartitionCheckInterval)
   {
     this.repartitionCheckInterval = repartitionCheckInterval;
   }

   public long getRepartitionCheckInterval()
   {
     return repartitionCheckInterval;
   }

   public void setRepartitionInterval(long repartitionInterval)
   {
     this.repartitionInterval = repartitionInterval;
   }

   public long getRepartitionInterval()
   {
     return repartitionInterval;
   }

   //@Pattern(regexp="ONE_TO_ONE|ONE_TO_MANY|ONE_TO_MANY_HEURISTIC", flags={Flag.CASE_INSENSITIVE})
   public void setStrategy(String policy)
   {
     this.strategy = PartitionStrategy.valueOf(policy.toUpperCase());
   }
 }