crunch-kafka/src/test/java/org/apache/crunch/kafka/inputformat/KafkaRecordReaderIT.java - crunch - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  * <p>
  * http://www.apache.org/licenses/LICENSE-2.0
  * <p>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.crunch.kafka.inputformat;

 import kafka.api.OffsetRequest;
 import org.apache.crunch.Pair;
 import org.apache.crunch.kafka.ClusterTest;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapred.TaskAttemptContext;
 import org.apache.kafka.clients.consumer.Consumer;
 import org.apache.kafka.clients.consumer.ConsumerRecords;
 import org.apache.kafka.common.TopicPartition;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
 import org.junit.runner.RunWith;
 import org.mockito.Matchers;
 import org.mockito.Mock;
 import org.mockito.runners.MockitoJUnitRunner;

 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Set;

 import static org.apache.crunch.kafka.KafkaUtils.getBrokerOffsets;
 import static org.hamcrest.CoreMatchers.notNullValue;
 import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertThat;
 import static org.junit.matchers.JUnitMatchers.hasItem;
 import static org.mockito.Mockito.when;

 @RunWith(MockitoJUnitRunner.class)
 public class KafkaRecordReaderIT {

   @Mock
   private TaskAttemptContext context;

   @Mock
   private Consumer<String, String> consumer;

   @Rule
   public TestName testName = new TestName();
   private Properties consumerProps;
   private Configuration config;

   @BeforeClass
   public static void setup() throws Exception {
     ClusterTest.startTest();
   }

   @AfterClass
   public static void cleanup() throws Exception {
     ClusterTest.endTest();
   }

   private String topic;

   @Before
   public void setupTest() {
     topic = testName.getMethodName();
     consumerProps = ClusterTest.getConsumerProperties();
     config = ClusterTest.getConsumerConfig();
     when(context.getConfiguration()).thenReturn(config);
     when(consumer.poll(Matchers.anyLong())).thenReturn(null);
   }

   @Test
   public void readData() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
           partitionInfo.getValue().first(), partitionInfo.getValue().second());

       KafkaRecordReader<String, String> recordReader = new KafkaRecordReader<>();
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         assertThat(keys, hasItem(recordReader.getCurrentKey()));
         assertThat(recordReader.getCurrentValue(), is(notNullValue()));
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(keys.size()));
   }

   @Test
   public void pollReturnsNullAtStart() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
               partitionInfo.getValue().first(), partitionInfo.getValue().second());

       KafkaRecordReader<String, String> recordReader = new NullAtStartKafkaRecordReader<>(consumer, 3);
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         assertThat(keys, hasItem(recordReader.getCurrentKey()));
         assertThat(recordReader.getCurrentValue(), is(notNullValue()));
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(keys.size()));
   }

   @Test
   public void pollReturnsEmptyAtStart() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
               partitionInfo.getValue().first(), partitionInfo.getValue().second());

       when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
       KafkaRecordReader<String, String> recordReader = new NullAtStartKafkaRecordReader<>(consumer, 3);
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         assertThat(keys, hasItem(recordReader.getCurrentKey()));
         assertThat(recordReader.getCurrentValue(), is(notNullValue()));
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(keys.size()));
   }

   @Test
   public void pollReturnsNullInMiddle() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
               partitionInfo.getValue().first(), partitionInfo.getValue().second());

       KafkaRecordReader<String, String> recordReader = new InjectableKafkaRecordReader<>(consumer, 1);
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         assertThat(keys, hasItem(recordReader.getCurrentKey()));
         assertThat(recordReader.getCurrentValue(), is(notNullValue()));
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(keys.size()));
   }

   @Test
   public void pollReturnsEmptyInMiddle() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
               partitionInfo.getValue().first(), partitionInfo.getValue().second());

       when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
       KafkaRecordReader<String, String> recordReader = new InjectableKafkaRecordReader<>(consumer, 1);
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         assertThat(keys, hasItem(recordReader.getCurrentKey()));
         assertThat(recordReader.getCurrentValue(), is(notNullValue()));
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(keys.size()));
   }

   @Test
   public void pollEarliestEqualsEnding() throws IOException, InterruptedException {
     List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

     Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
     Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

     Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
     for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
       Long endingOffset = endOffsets.get(entry.getKey());
       offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
     }

     KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

     Set<String> keysRead = new HashSet<>();
     //read all data from all splits
     for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
       KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
               partitionInfo.getValue().first(), partitionInfo.getValue().second());

       when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
       KafkaRecordReader<String, String> recordReader = new EarliestRecordReader<>(consumer,
               partitionInfo.getValue().second());
       recordReader.initialize(split, context);

       int numRecordsFound = 0;
       while (recordReader.nextKeyValue()) {
         keysRead.add(recordReader.getCurrentKey());
         numRecordsFound++;
       }
       recordReader.close();

       //assert that it encountered a partitions worth of data
       assertThat(numRecordsFound, is(0));
     }

     //validate the same number of unique keys was read as were written.
     assertThat(keysRead.size(), is(0));
   }


   private static class NullAtStartKafkaRecordReader<K, V> extends KafkaRecordReader<K, V>{

     private final Consumer consumer;
     private final int callAttempts;

     private int attempts;

     public NullAtStartKafkaRecordReader(Consumer consumer, int callAttempts){
       this.consumer = consumer;
       this.callAttempts = callAttempts;
       attempts = 0;
     }

     @Override
     protected Consumer<K, V> getConsumer() {
       if(attempts > callAttempts){
         return super.getConsumer();
       }
       attempts++;
       return consumer;
     }
   }

   private static class InjectableKafkaRecordReader<K, V> extends KafkaRecordReader<K, V>{

     private final Consumer consumer;
     private final int failAttempt;

     private int attempts;

     public InjectableKafkaRecordReader(Consumer consumer, int failAttempt){
       this.consumer = consumer;
       this.failAttempt = failAttempt;
       attempts = 0;
     }

     @Override
     protected Consumer<K, V> getConsumer() {
       if(attempts == failAttempt){
         attempts++;
         return consumer;
       }
       attempts++;
       return super.getConsumer();
     }
   }

   private static class EarliestRecordReader<K,V> extends KafkaRecordReader<K, V>{

     private final long earliest;
     private final Consumer consumer;

     public EarliestRecordReader(Consumer consumer, long earliest){
       this.earliest = earliest;
       this.consumer = consumer;
     }

     @Override
     protected Consumer<K, V> getConsumer() {
       return consumer;
     }

     @Override
     protected long getEarliestOffset() {
       return earliest;
     }
   }
 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	* <p>
	* http://www.apache.org/licenses/LICENSE-2.0
	* <p>
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.crunch.kafka.inputformat;

	import kafka.api.OffsetRequest;
	import org.apache.crunch.Pair;
	import org.apache.crunch.kafka.ClusterTest;
	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.mapred.TaskAttemptContext;
	import org.apache.kafka.clients.consumer.Consumer;
	import org.apache.kafka.clients.consumer.ConsumerRecords;
	import org.apache.kafka.common.TopicPartition;
	import org.junit.AfterClass;
	import org.junit.Before;
	import org.junit.BeforeClass;
	import org.junit.Ignore;
	import org.junit.Rule;
	import org.junit.Test;
	import org.junit.rules.TestName;
	import org.junit.runner.RunWith;
	import org.mockito.Matchers;
	import org.mockito.Mock;
	import org.mockito.runners.MockitoJUnitRunner;

	import java.io.IOException;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Properties;
	import java.util.Set;

	import static org.apache.crunch.kafka.KafkaUtils.getBrokerOffsets;
	import static org.hamcrest.CoreMatchers.notNullValue;
	import static org.hamcrest.core.Is.is;
	import static org.junit.Assert.assertThat;
	import static org.junit.matchers.JUnitMatchers.hasItem;
	import static org.mockito.Mockito.when;

	@RunWith(MockitoJUnitRunner.class)
	public class KafkaRecordReaderIT {

	@Mock
	private TaskAttemptContext context;

	@Mock
	private Consumer<String, String> consumer;

	@Rule
	public TestName testName = new TestName();
	private Properties consumerProps;
	private Configuration config;

	@BeforeClass
	public static void setup() throws Exception {
	ClusterTest.startTest();
	}

	@AfterClass
	public static void cleanup() throws Exception {
	ClusterTest.endTest();
	}

	private String topic;

	@Before
	public void setupTest() {
	topic = testName.getMethodName();
	consumerProps = ClusterTest.getConsumerProperties();
	config = ClusterTest.getConsumerConfig();
	when(context.getConfiguration()).thenReturn(config);
	when(consumer.poll(Matchers.anyLong())).thenReturn(null);
	}

	@Test
	public void readData() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	KafkaRecordReader<String, String> recordReader = new KafkaRecordReader<>();
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	assertThat(keys, hasItem(recordReader.getCurrentKey()));
	assertThat(recordReader.getCurrentValue(), is(notNullValue()));
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(keys.size()));
	}

	@Test
	public void pollReturnsNullAtStart() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	KafkaRecordReader<String, String> recordReader = new NullAtStartKafkaRecordReader<>(consumer, 3);
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	assertThat(keys, hasItem(recordReader.getCurrentKey()));
	assertThat(recordReader.getCurrentValue(), is(notNullValue()));
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(keys.size()));
	}

	@Test
	public void pollReturnsEmptyAtStart() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
	KafkaRecordReader<String, String> recordReader = new NullAtStartKafkaRecordReader<>(consumer, 3);
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	assertThat(keys, hasItem(recordReader.getCurrentKey()));
	assertThat(recordReader.getCurrentValue(), is(notNullValue()));
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(keys.size()));
	}

	@Test
	public void pollReturnsNullInMiddle() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	KafkaRecordReader<String, String> recordReader = new InjectableKafkaRecordReader<>(consumer, 1);
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	assertThat(keys, hasItem(recordReader.getCurrentKey()));
	assertThat(recordReader.getCurrentValue(), is(notNullValue()));
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(keys.size()));
	}

	@Test
	public void pollReturnsEmptyInMiddle() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
	KafkaRecordReader<String, String> recordReader = new InjectableKafkaRecordReader<>(consumer, 1);
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	assertThat(keys, hasItem(recordReader.getCurrentKey()));
	assertThat(recordReader.getCurrentValue(), is(notNullValue()));
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(((long) numRecordsFound), is(partitionInfo.getValue().second() - partitionInfo.getValue().first()));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(keys.size()));
	}

	@Test
	public void pollEarliestEqualsEnding() throws IOException, InterruptedException {
	List<String> keys = ClusterTest.writeData(ClusterTest.getProducerProperties(), topic, "batch", 10, 10);

	Map<TopicPartition, Long> startOffsets = getBrokerOffsets(consumerProps, OffsetRequest.EarliestTime(), topic);
	Map<TopicPartition, Long> endOffsets = getBrokerOffsets(consumerProps, OffsetRequest.LatestTime(), topic);

	Map<TopicPartition, Pair<Long, Long>> offsets = new HashMap<>();
	for (Map.Entry<TopicPartition, Long> entry : startOffsets.entrySet()) {
	Long endingOffset = endOffsets.get(entry.getKey());
	offsets.put(entry.getKey(), Pair.of(entry.getValue(), endingOffset));
	}

	KafkaInputFormat.writeOffsetsToConfiguration(offsets, config);

	Set<String> keysRead = new HashSet<>();
	//read all data from all splits
	for (Map.Entry<TopicPartition, Pair<Long, Long>> partitionInfo : offsets.entrySet()) {
	KafkaInputSplit split = new KafkaInputSplit(partitionInfo.getKey().topic(), partitionInfo.getKey().partition(),
	partitionInfo.getValue().first(), partitionInfo.getValue().second());

	when(consumer.poll(Matchers.anyLong())).thenReturn(ConsumerRecords.<String, String>empty());
	KafkaRecordReader<String, String> recordReader = new EarliestRecordReader<>(consumer,
	partitionInfo.getValue().second());
	recordReader.initialize(split, context);

	int numRecordsFound = 0;
	while (recordReader.nextKeyValue()) {
	keysRead.add(recordReader.getCurrentKey());
	numRecordsFound++;
	}
	recordReader.close();

	//assert that it encountered a partitions worth of data
	assertThat(numRecordsFound, is(0));
	}

	//validate the same number of unique keys was read as were written.
	assertThat(keysRead.size(), is(0));
	}


	private static class NullAtStartKafkaRecordReader<K, V> extends KafkaRecordReader<K, V>{

	private final Consumer consumer;
	private final int callAttempts;

	private int attempts;

	public NullAtStartKafkaRecordReader(Consumer consumer, int callAttempts){
	this.consumer = consumer;
	this.callAttempts = callAttempts;
	attempts = 0;
	}

	@Override
	protected Consumer<K, V> getConsumer() {
	if(attempts > callAttempts){
	return super.getConsumer();
	}
	attempts++;
	return consumer;
	}
	}

	private static class InjectableKafkaRecordReader<K, V> extends KafkaRecordReader<K, V>{

	private final Consumer consumer;
	private final int failAttempt;

	private int attempts;

	public InjectableKafkaRecordReader(Consumer consumer, int failAttempt){
	this.consumer = consumer;
	this.failAttempt = failAttempt;
	attempts = 0;
	}

	@Override
	protected Consumer<K, V> getConsumer() {
	if(attempts == failAttempt){
	attempts++;
	return consumer;
	}
	attempts++;
	return super.getConsumer();
	}
	}

	private static class EarliestRecordReader<K,V> extends KafkaRecordReader<K, V>{

	private final long earliest;
	private final Consumer consumer;

	public EarliestRecordReader(Consumer consumer, long earliest){
	this.earliest = earliest;
	this.consumer = consumer;
	}

	@Override
	protected Consumer<K, V> getConsumer() {
	return consumer;
	}

	@Override
	protected long getEarliestOffset() {
	return earliest;
	}
	}
	}