samza-kafka/src/main/scala/org/apache/samza/system/kafka/KafkaSystemProducer.scala - samza - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.samza.system.kafka


 import java.util.concurrent.atomic.AtomicReference
 import java.util.concurrent.TimeUnit

 import org.apache.kafka.clients.producer.Callback
 import org.apache.kafka.clients.producer.Producer
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.clients.producer.RecordMetadata
 import org.apache.kafka.common.PartitionInfo
 import org.apache.samza.system.OutgoingMessageEnvelope
 import org.apache.samza.system.SystemProducer
 import org.apache.samza.system.SystemProducerException
 import org.apache.samza.util.ExponentialSleepStrategy
 import org.apache.samza.util.KafkaUtil
 import org.apache.samza.util.Logging
 import org.apache.samza.util.TimerUtil

 class KafkaSystemProducer(systemName: String,
                           retryBackoff: ExponentialSleepStrategy = new ExponentialSleepStrategy,
                           getProducer: () => Producer[Array[Byte], Array[Byte]],
                           metrics: KafkaSystemProducerMetrics,
                           val clock: () => Long = () => System.nanoTime,
                           val dropProducerExceptions: Boolean = false) extends SystemProducer with Logging with TimerUtil {

   // Represents a fatal error that caused the producer to close.
   val fatalException: AtomicReference[SystemProducerException] = new AtomicReference[SystemProducerException]()
   val producerRef: AtomicReference[Producer[Array[Byte], Array[Byte]]] = new AtomicReference[Producer[Array[Byte], Array[Byte]]]()
   val producerCreationLock: Object = new Object
   @volatile var stopped = false

   def start(): Unit = {
     producerRef.set(getProducer())
   }

   def stop() {
     info("Stopping producer for system: " + this.systemName)

     stopped = true
     val currentProducer = producerRef.getAndSet(null)
     try {
       if (currentProducer != null) {
         currentProducer.close // Also performs the equivalent of a flush()
       }

       val exception = fatalException.get()
       if (exception != null) {
         error("Observed an earlier send() error while closing producer", exception)
       }
     } catch {
       case e: Exception => error("Error while closing producer for system: " + systemName, e)
     }
   }

   def register(source: String) {
   }

   def send(source: String, envelope: OutgoingMessageEnvelope) {
     trace("Enqueuing message: %s, %s." format (source, envelope))

     val topicName = envelope.getSystemStream.getStream
     if (topicName == null || topicName.isEmpty) {
       throw new IllegalArgumentException("Invalid system stream: " + envelope.getSystemStream)
     }

     val globalProducerException = fatalException.get()
     if (globalProducerException != null) {
       metrics.sendFailed.inc
       throw new SystemProducerException("Producer was unable to recover from previous exception.", globalProducerException)
     }

     val currentProducer = getOrCreateCurrentProducer

     // Java-based Kafka producer API requires an "Integer" type partitionKey and does not allow custom overriding of Partitioners
     // Any kind of custom partitioning has to be done on the client-side
     val partitions: java.util.List[PartitionInfo] = currentProducer.partitionsFor(topicName)
     val partitionKey = if (envelope.getPartitionKey != null) KafkaUtil.getIntegerPartitionKey(envelope, partitions) else null
     val record = new ProducerRecord(envelope.getSystemStream.getStream,
                                     partitionKey,
                                     envelope.getKey.asInstanceOf[Array[Byte]],
                                     envelope.getMessage.asInstanceOf[Array[Byte]])

     try {
       currentProducer.send(record, new Callback {
         def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
           if (exception == null) {
             metrics.sendSuccess.inc
           } else {
             val producerException = new SystemProducerException("Failed to send message for Source: %s on System:%s Topic:%s Partition:%s"
               .format(source, systemName, topicName, partitionKey), exception)

             handleFatalSendException(currentProducer, producerException)
           }
         }
       })
       metrics.sends.inc
     } catch {
       case originalException : Exception =>
         val producerException = new SystemProducerException("Failed to send message for Source: %s on System:%s Topic:%s Partition:%s"
           .format(source, systemName, topicName, partitionKey), originalException)

         metrics.sendFailed.inc
         error("Got a synchronous error from Kafka producer.", producerException)
         // Synchronous exceptions are always recoverable so propagate it up and let the user decide
         throw producerException
     }
   }

   def flush(source: String) {
     updateTimer(metrics.flushNs) {
       metrics.flushes.inc

       val currentProducer = producerRef.get()
       if (currentProducer == null) {
         if (dropProducerExceptions) {
           // No producer to flush, but we're ignoring exceptions so just return.
           warn("Skipping flush because the Kafka producer is null.")
           metrics.flushFailed.inc
           return
         }
         throw new SystemProducerException("Kafka producer is null.")
       }

       // Flush only throws InterruptedException, all other errors are handled in send() callbacks
       currentProducer.flush()

       // Invariant: At this point either
       // 1. The producer is fine and there are no exceptions to handle   OR
       // 2. The producer is closed and one or more sources have exceptions to handle
       //   2a. All new sends get a ProducerClosedException or IllegalStateException (depending on kafka version)
       //   2b. There are no messages in flight because the producer is closed

       // We must check for an exception AFTER flush() because when flush() returns all callbacks for messages sent
       // in that flush() are guaranteed to have completed and we update the exception in the callback.
       // If there is an exception, we rethrow it here to prevent the checkpoint.
       val exception = fatalException.get()
       if (exception != null) {
         metrics.flushFailed.inc
         throw new SystemProducerException("Flush failed. One or more batches of messages were not sent!", exception)
       }
       trace("Flushed %s." format source)
     }
   }


   /**
     * Handles a fatal exception by closing the producer and either recreating it or storing the exception
     * to rethrow later, depending on the value of dropProducerExceptions.
     *
     * @param currentProducer   the current producer for which the exception occurred. Must not be null.
     * @param producerException the exception to handle.
     */
   private def handleFatalSendException(currentProducer: Producer[Array[Byte], Array[Byte]], producerException: SystemProducerException): Unit = {
     metrics.sendFailed.inc
     error(producerException)
     // The SystemProducer API is synchronous, so there's no way for us to guarantee that an exception will
     // be handled by the Task before we recreate the producer, and if it isn't handled, a concurrent send() from another
     // Task could send on the new producer before the first Task properly handled the exception and produce out of order messages.
     // So we have to handle it right here in the SystemProducer.
     if (dropProducerExceptions) {
       warn("Ignoring producer exception. All messages in the failed producer request will be dropped!")

       // Prevent each callback from closing and nulling producer for the same failure.
       if (currentProducer == producerRef.get()) {
         info("Closing producer for system %s." format systemName)
         try {
           // send()s can get ProducerClosedException if the producer is stopped after they get the currentProducer
           // reference but before producer.send() returns. That's ONLY ok when dropProducerExceptions is true.
           // Also, when producer.close(0) is invoked on the Kafka IO thread, when it returns there will be no more
           // messages sent over the wire. This is key to ensuring no out-of-order messages as a result of recreating
           // the producer.
           currentProducer.close(0, TimeUnit.MILLISECONDS)
         } catch {
           case exception: Exception => error("Exception while closing producer.", exception)
         }
         producerRef.compareAndSet(currentProducer, null)
       }
     } else {
       // If there is an exception in the callback, it means that the Kafka producer has exhausted the max-retries
       // Close producer to ensure messages queued in-flight are not sent and hence, avoid re-ordering
       // This works because there is only 1 IO thread and no IO can be done until the callback returns.
       // Do not create a new producer here! It cannot be done without data loss for all concurrency modes.
       fatalException.compareAndSet(null, producerException)
       try {
         currentProducer.close(0, TimeUnit.MILLISECONDS)
       } catch {
         case exception: Exception => error("Exception while closing producer.", exception)
       }
     }
   }

   /**
     * @return the current producer. Never returns null.
     */
   private def getOrCreateCurrentProducer = {
     var currentProducer = producerRef.get

     if (currentProducer == null) {
       if (dropProducerExceptions && !stopped) {
         // Note: While this lock prevents others from creating a new producer, they could still set it to null.
         producerCreationLock.synchronized {
           currentProducer = producerRef.get
           if (currentProducer == null) {
             currentProducer = getProducer()
             producerRef.set(currentProducer)
           }
         }
         // Invariant: currentProducer must not be null at this point.
       } else {
         throw new SystemProducerException("Kafka producer is null.")
       }
     }
     currentProducer
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.samza.system.kafka


	import java.util.concurrent.atomic.AtomicReference
	import java.util.concurrent.TimeUnit

	import org.apache.kafka.clients.producer.Callback
	import org.apache.kafka.clients.producer.Producer
	import org.apache.kafka.clients.producer.ProducerRecord
	import org.apache.kafka.clients.producer.RecordMetadata
	import org.apache.kafka.common.PartitionInfo
	import org.apache.samza.system.OutgoingMessageEnvelope
	import org.apache.samza.system.SystemProducer
	import org.apache.samza.system.SystemProducerException
	import org.apache.samza.util.ExponentialSleepStrategy
	import org.apache.samza.util.KafkaUtil
	import org.apache.samza.util.Logging
	import org.apache.samza.util.TimerUtil

	class KafkaSystemProducer(systemName: String,
	retryBackoff: ExponentialSleepStrategy = new ExponentialSleepStrategy,
	getProducer: () => Producer[Array[Byte], Array[Byte]],
	metrics: KafkaSystemProducerMetrics,
	val clock: () => Long = () => System.nanoTime,
	val dropProducerExceptions: Boolean = false) extends SystemProducer with Logging with TimerUtil {

	// Represents a fatal error that caused the producer to close.
	val fatalException: AtomicReference[SystemProducerException] = new AtomicReference[SystemProducerException]()
	val producerRef: AtomicReference[Producer[Array[Byte], Array[Byte]]] = new AtomicReference[Producer[Array[Byte], Array[Byte]]]()
	val producerCreationLock: Object = new Object
	@volatile var stopped = false

	def start(): Unit = {
	producerRef.set(getProducer())
	}

	def stop() {
	info("Stopping producer for system: " + this.systemName)

	stopped = true
	val currentProducer = producerRef.getAndSet(null)
	try {
	if (currentProducer != null) {
	currentProducer.close // Also performs the equivalent of a flush()
	}

	val exception = fatalException.get()
	if (exception != null) {
	error("Observed an earlier send() error while closing producer", exception)
	}
	} catch {
	case e: Exception => error("Error while closing producer for system: " + systemName, e)
	}
	}

	def register(source: String) {
	}

	def send(source: String, envelope: OutgoingMessageEnvelope) {
	trace("Enqueuing message: %s, %s." format (source, envelope))

	val topicName = envelope.getSystemStream.getStream
	if (topicName == null \|\| topicName.isEmpty) {
	throw new IllegalArgumentException("Invalid system stream: " + envelope.getSystemStream)
	}

	val globalProducerException = fatalException.get()
	if (globalProducerException != null) {
	metrics.sendFailed.inc
	throw new SystemProducerException("Producer was unable to recover from previous exception.", globalProducerException)
	}

	val currentProducer = getOrCreateCurrentProducer

	// Java-based Kafka producer API requires an "Integer" type partitionKey and does not allow custom overriding of Partitioners
	// Any kind of custom partitioning has to be done on the client-side
	val partitions: java.util.List[PartitionInfo] = currentProducer.partitionsFor(topicName)
	val partitionKey = if (envelope.getPartitionKey != null) KafkaUtil.getIntegerPartitionKey(envelope, partitions) else null
	val record = new ProducerRecord(envelope.getSystemStream.getStream,
	partitionKey,
	envelope.getKey.asInstanceOf[Array[Byte]],
	envelope.getMessage.asInstanceOf[Array[Byte]])

	try {
	currentProducer.send(record, new Callback {
	def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = {
	if (exception == null) {
	metrics.sendSuccess.inc
	} else {
	val producerException = new SystemProducerException("Failed to send message for Source: %s on System:%s Topic:%s Partition:%s"
	.format(source, systemName, topicName, partitionKey), exception)

	handleFatalSendException(currentProducer, producerException)
	}
	}
	})
	metrics.sends.inc
	} catch {
	case originalException : Exception =>
	val producerException = new SystemProducerException("Failed to send message for Source: %s on System:%s Topic:%s Partition:%s"
	.format(source, systemName, topicName, partitionKey), originalException)

	metrics.sendFailed.inc
	error("Got a synchronous error from Kafka producer.", producerException)
	// Synchronous exceptions are always recoverable so propagate it up and let the user decide
	throw producerException
	}
	}

	def flush(source: String) {
	updateTimer(metrics.flushNs) {
	metrics.flushes.inc

	val currentProducer = producerRef.get()
	if (currentProducer == null) {
	if (dropProducerExceptions) {
	// No producer to flush, but we're ignoring exceptions so just return.
	warn("Skipping flush because the Kafka producer is null.")
	metrics.flushFailed.inc
	return
	}
	throw new SystemProducerException("Kafka producer is null.")
	}

	// Flush only throws InterruptedException, all other errors are handled in send() callbacks
	currentProducer.flush()

	// Invariant: At this point either
	// 1. The producer is fine and there are no exceptions to handle OR
	// 2. The producer is closed and one or more sources have exceptions to handle
	// 2a. All new sends get a ProducerClosedException or IllegalStateException (depending on kafka version)
	// 2b. There are no messages in flight because the producer is closed

	// We must check for an exception AFTER flush() because when flush() returns all callbacks for messages sent
	// in that flush() are guaranteed to have completed and we update the exception in the callback.
	// If there is an exception, we rethrow it here to prevent the checkpoint.
	val exception = fatalException.get()
	if (exception != null) {
	metrics.flushFailed.inc
	throw new SystemProducerException("Flush failed. One or more batches of messages were not sent!", exception)
	}
	trace("Flushed %s." format source)
	}
	}


	/**
	* Handles a fatal exception by closing the producer and either recreating it or storing the exception
	* to rethrow later, depending on the value of dropProducerExceptions.
	*
	* @param currentProducer the current producer for which the exception occurred. Must not be null.
	* @param producerException the exception to handle.
	*/
	private def handleFatalSendException(currentProducer: Producer[Array[Byte], Array[Byte]], producerException: SystemProducerException): Unit = {
	metrics.sendFailed.inc
	error(producerException)
	// The SystemProducer API is synchronous, so there's no way for us to guarantee that an exception will
	// be handled by the Task before we recreate the producer, and if it isn't handled, a concurrent send() from another
	// Task could send on the new producer before the first Task properly handled the exception and produce out of order messages.
	// So we have to handle it right here in the SystemProducer.
	if (dropProducerExceptions) {
	warn("Ignoring producer exception. All messages in the failed producer request will be dropped!")

	// Prevent each callback from closing and nulling producer for the same failure.
	if (currentProducer == producerRef.get()) {
	info("Closing producer for system %s." format systemName)
	try {
	// send()s can get ProducerClosedException if the producer is stopped after they get the currentProducer
	// reference but before producer.send() returns. That's ONLY ok when dropProducerExceptions is true.
	// Also, when producer.close(0) is invoked on the Kafka IO thread, when it returns there will be no more
	// messages sent over the wire. This is key to ensuring no out-of-order messages as a result of recreating
	// the producer.
	currentProducer.close(0, TimeUnit.MILLISECONDS)
	} catch {
	case exception: Exception => error("Exception while closing producer.", exception)
	}
	producerRef.compareAndSet(currentProducer, null)
	}
	} else {
	// If there is an exception in the callback, it means that the Kafka producer has exhausted the max-retries
	// Close producer to ensure messages queued in-flight are not sent and hence, avoid re-ordering
	// This works because there is only 1 IO thread and no IO can be done until the callback returns.
	// Do not create a new producer here! It cannot be done without data loss for all concurrency modes.
	fatalException.compareAndSet(null, producerException)
	try {
	currentProducer.close(0, TimeUnit.MILLISECONDS)
	} catch {
	case exception: Exception => error("Exception while closing producer.", exception)
	}
	}
	}

	/**
	* @return the current producer. Never returns null.
	*/
	private def getOrCreateCurrentProducer = {
	var currentProducer = producerRef.get

	if (currentProducer == null) {
	if (dropProducerExceptions && !stopped) {
	// Note: While this lock prevents others from creating a new producer, they could still set it to null.
	producerCreationLock.synchronized {
	currentProducer = producerRef.get
	if (currentProducer == null) {
	currentProducer = getProducer()
	producerRef.set(currentProducer)
	}
	}
	// Invariant: currentProducer must not be null at this point.
	} else {
	throw new SystemProducerException("Kafka producer is null.")
	}
	}
	currentProducer
	}
	}