nifi-nar-bundles/nifi-hadoop-bundle/nifi-hdfs-processors/src/test/java/org/apache/nifi/processors/hadoop/TestCreateHadoopSequenceFile.java - nifi - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.nifi.processors.hadoop;

 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.compress.BZip2Codec;
 import org.apache.hadoop.io.compress.DefaultCodec;
 import org.apache.nifi.components.AllowableValue;
 import org.apache.nifi.components.PropertyDescriptor;
 import org.apache.nifi.flowfile.attributes.CoreAttributes;
 import org.apache.nifi.flowfile.attributes.StandardFlowFileMediaType;
 import org.apache.nifi.hadoop.KerberosProperties;
 import org.apache.nifi.util.MockFlowFile;
 import org.apache.nifi.util.NiFiProperties;
 import org.apache.nifi.util.TestRunner;
 import org.apache.nifi.util.TestRunners;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;

 public class TestCreateHadoopSequenceFile {

     private TestRunner controller;

     private final File testdata = new File("src/test/resources/testdata");
     private final File[] inFiles = new File[]{new File(testdata, "randombytes-1"),
         new File(testdata, "randombytes-2"), new File(testdata, "randombytes-3")
     };

     private NiFiProperties mockNiFiProperties;
     private KerberosProperties kerberosProperties;

     @BeforeClass
     public static void setUpClass() {
         System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
         System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hadoop", "debug");
     }

     @Before
     public void setUp() {
         mockNiFiProperties = mock(NiFiProperties.class);
         when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
         kerberosProperties = new KerberosProperties(null);

         CreateHadoopSequenceFile proc = new TestableCreateHadoopSequenceFile(kerberosProperties);
         controller = TestRunners.newTestRunner(proc);
     }

     @After
     public void tearDown() {
         controller.clearTransferState();
     }

     @Test
     public void validateAllowableValuesForCompressionType() {
         PropertyDescriptor pd = CreateHadoopSequenceFile.COMPRESSION_TYPE;
         List<AllowableValue> allowableValues = pd.getAllowableValues();
         assertEquals("NONE", allowableValues.get(0).getValue());
         assertEquals("RECORD", allowableValues.get(1).getValue());
         assertEquals("BLOCK", allowableValues.get(2).getValue());
     }

     @Test
     public void testSimpleCase() throws IOException {
         for (File inFile : inFiles) {
             try (FileInputStream fin = new FileInputStream(inFile) ) {
                 controller.enqueue(fin);
             }
         }
         controller.run(3);

         List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
         List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

         assertEquals(0, failedFlowFiles.size());
         assertEquals(3, successSeqFiles.size());

     }

     @Test
     public void testSequenceFileSaysValueIsBytesWritable() throws UnsupportedEncodingException, IOException {
         for (File inFile : inFiles) {
             try (FileInputStream fin = new FileInputStream(inFile) ){
                 controller.enqueue(fin);
             }
         }
         controller.run(3);

         List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
         List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

         assertEquals(0, failedFlowFiles.size());
         assertEquals(3, successSeqFiles.size());

         final byte[] data = successSeqFiles.iterator().next().toByteArray();

         final String magicHeader = new String(data, 0, 3, "UTF-8");
         assertEquals("SEQ", magicHeader);
         // Format of header is SEQ followed by the version (1 byte).
         // Then, the length of the Key type (1 byte), then the Key type
         // Then, the length of the Value type(1 byte), then the Value type
         final String keyType = Text.class.getCanonicalName();
         final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
         final int valueTypeLength = data[5 + keyType.length()];
         final String valueType = BytesWritable.class.getCanonicalName();
         assertEquals(valueType.length(), valueTypeLength);
         assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
 //        FileOutputStream fos = new FileOutputStream("test.sf");
 //        fos.write(data);
 //        fos.flush();
 //        fos.close();
     }

     @Test
     public void testMergedTarData() throws IOException {
         Map<String, String> attributes = new HashMap<>();
         attributes.put(CoreAttributes.MIME_TYPE.key(), "application/tar");
         try (final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545312236534130.tar")) {
             controller.enqueue(fin, attributes);
             controller.run();
             List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
             assertEquals(1, successSeqFiles.size());
             final byte[] data = successSeqFiles.iterator().next().toByteArray();
             // Data should be greater than 1000000 because that's the size of 2 of our input files,
             // and the file size should contain all of that plus headers, but the headers should only
             // be a couple hundred bytes.
             assertTrue(data.length > 1000000);
             assertTrue(data.length < 1501000);
         }
     }

     @Test
     public void testMergedZipData() throws IOException {
         Map<String, String> attributes = new HashMap<>();
         attributes.put(CoreAttributes.MIME_TYPE.key(), "application/zip");
         try (FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545423550275052.zip")){
             controller.enqueue(fin, attributes);
             controller.run();
             List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
             assertEquals(1, successSeqFiles.size());
             final byte[] data = successSeqFiles.iterator().next().toByteArray();
             // Data should be greater than 1000000 because that's the size of 2 of our input files,
             // and the file size should contain all of that plus headers, but the headers should only
             // be a couple hundred bytes.
             assertTrue(data.length > 1000000);
             assertTrue(data.length < 1501000);
         }
 //        FileOutputStream fos = new FileOutputStream("zip-3-randoms.sf");
 //        fos.write(data);
 //        fos.flush();
 //        fos.close();
     }

     @Test
     public void testMergedFlowfilePackagedData() throws IOException {
         Map<String, String> attributes = new HashMap<>();
         attributes.put(CoreAttributes.MIME_TYPE.key(), StandardFlowFileMediaType.VERSION_3.getMediaType());
         try ( final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545479542069498.pkg")) {
             controller.enqueue(fin, attributes);

             controller.run();
             List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
             assertEquals(1, successSeqFiles.size());
             final byte[] data = successSeqFiles.iterator().next().toByteArray();
             // Data should be greater than 1000000 because that's the size of 2 of our input files,
             // and the file size should contain all of that plus headers, but the headers should only
             // be a couple hundred bytes.
             assertTrue(data.length > 1000000);
             assertTrue(data.length < 1501000);
         }
 //        FileOutputStream fos = new FileOutputStream("flowfilePkg-3-randoms.sf");
 //        fos.write(data);
 //        fos.flush();
 //        fos.close();
     }

     @Test
     public void testSequenceFileBzipCompressionCodec() throws UnsupportedEncodingException, IOException {

         controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.BZIP.name());
         controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

         File inFile = inFiles[0];
         try (FileInputStream fin = new FileInputStream(inFile) ){
             controller.enqueue(fin);
         }
         controller.run();

         List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
         List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

         assertEquals(0, failedFlowFiles.size());
         assertEquals(1, successSeqFiles.size());

         MockFlowFile ff = successSeqFiles.iterator().next();
         byte[] data = ff.toByteArray();


         final String magicHeader = new String(data, 0, 3, "UTF-8");
         assertEquals("SEQ", magicHeader);
         // Format of header is SEQ followed by the version (1 byte).
         // Then, the length of the Key type (1 byte), then the Key type
         // Then, the length of the Value type(1 byte), then the Value type
         final String keyType = Text.class.getCanonicalName();
         final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
         final int valueTypeLength = data[5 + keyType.length()];
         final String valueType = BytesWritable.class.getCanonicalName();

         assertEquals(valueType.length(), valueTypeLength);
         assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

         final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
         final int blockCompressionIndex = compressionIndex + 1;

         assertEquals(1, data[compressionIndex]);
         assertEquals(1, data[blockCompressionIndex]);

         final int codecTypeSize = data[blockCompressionIndex + 1];
         final int codecTypeStartIndex = blockCompressionIndex + 2;

         assertEquals(BZip2Codec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
     }

     @Test
     public void testSequenceFileDefaultCompressionCodec() throws UnsupportedEncodingException, IOException {

         controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.DEFAULT.name());
         controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

         File inFile = inFiles[0];
         try (FileInputStream fin = new FileInputStream(inFile) ){
             controller.enqueue(fin);
         }
         controller.run();

         List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
         List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

         assertEquals(0, failedFlowFiles.size());
         assertEquals(1, successSeqFiles.size());

         MockFlowFile ff = successSeqFiles.iterator().next();
         byte[] data = ff.toByteArray();


         final String magicHeader = new String(data, 0, 3, "UTF-8");
         assertEquals("SEQ", magicHeader);
         // Format of header is SEQ followed by the version (1 byte).
         // Then, the length of the Key type (1 byte), then the Key type
         // Then, the length of the Value type(1 byte), then the Value type
         final String keyType = Text.class.getCanonicalName();
         final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
         final int valueTypeLength = data[5 + keyType.length()];
         final String valueType = BytesWritable.class.getCanonicalName();

         assertEquals(valueType.length(), valueTypeLength);
         assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

         final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
         final int blockCompressionIndex = compressionIndex + 1;

         assertEquals(1, data[compressionIndex]);
         assertEquals(1, data[blockCompressionIndex]);

         final int codecTypeSize = data[blockCompressionIndex + 1];
         final int codecTypeStartIndex = blockCompressionIndex + 2;

         assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
     }

     @Test
     public void testSequenceFileNoneCompressionCodec() throws UnsupportedEncodingException, IOException {

         controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.NONE.name());
         controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

         File inFile = inFiles[0];
         try (FileInputStream fin = new FileInputStream(inFile) ){
             controller.enqueue(fin);
         }
         controller.run();

         List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
         List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

         assertEquals(0, failedFlowFiles.size());
         assertEquals(1, successSeqFiles.size());

         MockFlowFile ff = successSeqFiles.iterator().next();
         byte[] data = ff.toByteArray();


         final String magicHeader = new String(data, 0, 3, "UTF-8");
         assertEquals("SEQ", magicHeader);
         // Format of header is SEQ followed by the version (1 byte).
         // Then, the length of the Key type (1 byte), then the Key type
         // Then, the length of the Value type(1 byte), then the Value type
         final String keyType = Text.class.getCanonicalName();
         final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
         final int valueTypeLength = data[5 + keyType.length()];
         final String valueType = BytesWritable.class.getCanonicalName();

         assertEquals(valueType.length(), valueTypeLength);
         assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

         final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
         final int blockCompressionIndex = compressionIndex + 1;

         assertEquals(1, data[compressionIndex]);
         assertEquals(1, data[blockCompressionIndex]);

         final int codecTypeSize = data[blockCompressionIndex + 1];
         final int codecTypeStartIndex = blockCompressionIndex + 2;

         assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
     }

     private static class TestableCreateHadoopSequenceFile extends CreateHadoopSequenceFile {

         private KerberosProperties testKerbersProperties;

         public TestableCreateHadoopSequenceFile(KerberosProperties testKerbersProperties) {
             this.testKerbersProperties = testKerbersProperties;
         }

         @Override
         protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
             return testKerbersProperties;
         }
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.nifi.processors.hadoop;

	import org.apache.hadoop.io.BytesWritable;
	import org.apache.hadoop.io.SequenceFile;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.io.compress.BZip2Codec;
	import org.apache.hadoop.io.compress.DefaultCodec;
	import org.apache.nifi.components.AllowableValue;
	import org.apache.nifi.components.PropertyDescriptor;
	import org.apache.nifi.flowfile.attributes.CoreAttributes;
	import org.apache.nifi.flowfile.attributes.StandardFlowFileMediaType;
	import org.apache.nifi.hadoop.KerberosProperties;
	import org.apache.nifi.util.MockFlowFile;
	import org.apache.nifi.util.NiFiProperties;
	import org.apache.nifi.util.TestRunner;
	import org.apache.nifi.util.TestRunners;
	import org.junit.After;
	import org.junit.Before;
	import org.junit.BeforeClass;
	import org.junit.Test;

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.IOException;
	import java.io.UnsupportedEncodingException;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;

	import static org.junit.Assert.assertEquals;
	import static org.junit.Assert.assertTrue;
	import static org.mockito.Mockito.mock;
	import static org.mockito.Mockito.when;

	public class TestCreateHadoopSequenceFile {

	private TestRunner controller;

	private final File testdata = new File("src/test/resources/testdata");
	private final File[] inFiles = new File[]{new File(testdata, "randombytes-1"),
	new File(testdata, "randombytes-2"), new File(testdata, "randombytes-3")
	};

	private NiFiProperties mockNiFiProperties;
	private KerberosProperties kerberosProperties;

	@BeforeClass
	public static void setUpClass() {
	System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
	System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hadoop", "debug");
	}

	@Before
	public void setUp() {
	mockNiFiProperties = mock(NiFiProperties.class);
	when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
	kerberosProperties = new KerberosProperties(null);

	CreateHadoopSequenceFile proc = new TestableCreateHadoopSequenceFile(kerberosProperties);
	controller = TestRunners.newTestRunner(proc);
	}

	@After
	public void tearDown() {
	controller.clearTransferState();
	}

	@Test
	public void validateAllowableValuesForCompressionType() {
	PropertyDescriptor pd = CreateHadoopSequenceFile.COMPRESSION_TYPE;
	List<AllowableValue> allowableValues = pd.getAllowableValues();
	assertEquals("NONE", allowableValues.get(0).getValue());
	assertEquals("RECORD", allowableValues.get(1).getValue());
	assertEquals("BLOCK", allowableValues.get(2).getValue());
	}

	@Test
	public void testSimpleCase() throws IOException {
	for (File inFile : inFiles) {
	try (FileInputStream fin = new FileInputStream(inFile) ) {
	controller.enqueue(fin);
	}
	}
	controller.run(3);

	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

	assertEquals(0, failedFlowFiles.size());
	assertEquals(3, successSeqFiles.size());

	}

	@Test
	public void testSequenceFileSaysValueIsBytesWritable() throws UnsupportedEncodingException, IOException {
	for (File inFile : inFiles) {
	try (FileInputStream fin = new FileInputStream(inFile) ){
	controller.enqueue(fin);
	}
	}
	controller.run(3);

	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

	assertEquals(0, failedFlowFiles.size());
	assertEquals(3, successSeqFiles.size());

	final byte[] data = successSeqFiles.iterator().next().toByteArray();

	final String magicHeader = new String(data, 0, 3, "UTF-8");
	assertEquals("SEQ", magicHeader);
	// Format of header is SEQ followed by the version (1 byte).
	// Then, the length of the Key type (1 byte), then the Key type
	// Then, the length of the Value type(1 byte), then the Value type
	final String keyType = Text.class.getCanonicalName();
	final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
	final int valueTypeLength = data[5 + keyType.length()];
	final String valueType = BytesWritable.class.getCanonicalName();
	assertEquals(valueType.length(), valueTypeLength);
	assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
	// FileOutputStream fos = new FileOutputStream("test.sf");
	// fos.write(data);
	// fos.flush();
	// fos.close();
	}

	@Test
	public void testMergedTarData() throws IOException {
	Map<String, String> attributes = new HashMap<>();
	attributes.put(CoreAttributes.MIME_TYPE.key(), "application/tar");
	try (final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545312236534130.tar")) {
	controller.enqueue(fin, attributes);
	controller.run();
	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	assertEquals(1, successSeqFiles.size());
	final byte[] data = successSeqFiles.iterator().next().toByteArray();
	// Data should be greater than 1000000 because that's the size of 2 of our input files,
	// and the file size should contain all of that plus headers, but the headers should only
	// be a couple hundred bytes.
	assertTrue(data.length > 1000000);
	assertTrue(data.length < 1501000);
	}
	}

	@Test
	public void testMergedZipData() throws IOException {
	Map<String, String> attributes = new HashMap<>();
	attributes.put(CoreAttributes.MIME_TYPE.key(), "application/zip");
	try (FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545423550275052.zip")){
	controller.enqueue(fin, attributes);
	controller.run();
	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	assertEquals(1, successSeqFiles.size());
	final byte[] data = successSeqFiles.iterator().next().toByteArray();
	// Data should be greater than 1000000 because that's the size of 2 of our input files,
	// and the file size should contain all of that plus headers, but the headers should only
	// be a couple hundred bytes.
	assertTrue(data.length > 1000000);
	assertTrue(data.length < 1501000);
	}
	// FileOutputStream fos = new FileOutputStream("zip-3-randoms.sf");
	// fos.write(data);
	// fos.flush();
	// fos.close();
	}

	@Test
	public void testMergedFlowfilePackagedData() throws IOException {
	Map<String, String> attributes = new HashMap<>();
	attributes.put(CoreAttributes.MIME_TYPE.key(), StandardFlowFileMediaType.VERSION_3.getMediaType());
	try ( final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545479542069498.pkg")) {
	controller.enqueue(fin, attributes);

	controller.run();
	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	assertEquals(1, successSeqFiles.size());
	final byte[] data = successSeqFiles.iterator().next().toByteArray();
	// Data should be greater than 1000000 because that's the size of 2 of our input files,
	// and the file size should contain all of that plus headers, but the headers should only
	// be a couple hundred bytes.
	assertTrue(data.length > 1000000);
	assertTrue(data.length < 1501000);
	}
	// FileOutputStream fos = new FileOutputStream("flowfilePkg-3-randoms.sf");
	// fos.write(data);
	// fos.flush();
	// fos.close();
	}

	@Test
	public void testSequenceFileBzipCompressionCodec() throws UnsupportedEncodingException, IOException {

	controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.BZIP.name());
	controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

	File inFile = inFiles[0];
	try (FileInputStream fin = new FileInputStream(inFile) ){
	controller.enqueue(fin);
	}
	controller.run();

	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

	assertEquals(0, failedFlowFiles.size());
	assertEquals(1, successSeqFiles.size());

	MockFlowFile ff = successSeqFiles.iterator().next();
	byte[] data = ff.toByteArray();


	final String magicHeader = new String(data, 0, 3, "UTF-8");
	assertEquals("SEQ", magicHeader);
	// Format of header is SEQ followed by the version (1 byte).
	// Then, the length of the Key type (1 byte), then the Key type
	// Then, the length of the Value type(1 byte), then the Value type
	final String keyType = Text.class.getCanonicalName();
	final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
	final int valueTypeLength = data[5 + keyType.length()];
	final String valueType = BytesWritable.class.getCanonicalName();

	assertEquals(valueType.length(), valueTypeLength);
	assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

	final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
	final int blockCompressionIndex = compressionIndex + 1;

	assertEquals(1, data[compressionIndex]);
	assertEquals(1, data[blockCompressionIndex]);

	final int codecTypeSize = data[blockCompressionIndex + 1];
	final int codecTypeStartIndex = blockCompressionIndex + 2;

	assertEquals(BZip2Codec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
	}

	@Test
	public void testSequenceFileDefaultCompressionCodec() throws UnsupportedEncodingException, IOException {

	controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.DEFAULT.name());
	controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

	File inFile = inFiles[0];
	try (FileInputStream fin = new FileInputStream(inFile) ){
	controller.enqueue(fin);
	}
	controller.run();

	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

	assertEquals(0, failedFlowFiles.size());
	assertEquals(1, successSeqFiles.size());

	MockFlowFile ff = successSeqFiles.iterator().next();
	byte[] data = ff.toByteArray();


	final String magicHeader = new String(data, 0, 3, "UTF-8");
	assertEquals("SEQ", magicHeader);
	// Format of header is SEQ followed by the version (1 byte).
	// Then, the length of the Key type (1 byte), then the Key type
	// Then, the length of the Value type(1 byte), then the Value type
	final String keyType = Text.class.getCanonicalName();
	final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
	final int valueTypeLength = data[5 + keyType.length()];
	final String valueType = BytesWritable.class.getCanonicalName();

	assertEquals(valueType.length(), valueTypeLength);
	assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

	final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
	final int blockCompressionIndex = compressionIndex + 1;

	assertEquals(1, data[compressionIndex]);
	assertEquals(1, data[blockCompressionIndex]);

	final int codecTypeSize = data[blockCompressionIndex + 1];
	final int codecTypeStartIndex = blockCompressionIndex + 2;

	assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
	}

	@Test
	public void testSequenceFileNoneCompressionCodec() throws UnsupportedEncodingException, IOException {

	controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.NONE.name());
	controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());

	File inFile = inFiles[0];
	try (FileInputStream fin = new FileInputStream(inFile) ){
	controller.enqueue(fin);
	}
	controller.run();

	List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
	List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);

	assertEquals(0, failedFlowFiles.size());
	assertEquals(1, successSeqFiles.size());

	MockFlowFile ff = successSeqFiles.iterator().next();
	byte[] data = ff.toByteArray();


	final String magicHeader = new String(data, 0, 3, "UTF-8");
	assertEquals("SEQ", magicHeader);
	// Format of header is SEQ followed by the version (1 byte).
	// Then, the length of the Key type (1 byte), then the Key type
	// Then, the length of the Value type(1 byte), then the Value type
	final String keyType = Text.class.getCanonicalName();
	final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
	final int valueTypeLength = data[5 + keyType.length()];
	final String valueType = BytesWritable.class.getCanonicalName();

	assertEquals(valueType.length(), valueTypeLength);
	assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));

	final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
	final int blockCompressionIndex = compressionIndex + 1;

	assertEquals(1, data[compressionIndex]);
	assertEquals(1, data[blockCompressionIndex]);

	final int codecTypeSize = data[blockCompressionIndex + 1];
	final int codecTypeStartIndex = blockCompressionIndex + 2;

	assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
	}

	private static class TestableCreateHadoopSequenceFile extends CreateHadoopSequenceFile {

	private KerberosProperties testKerbersProperties;

	public TestableCreateHadoopSequenceFile(KerberosProperties testKerbersProperties) {
	this.testKerbersProperties = testKerbersProperties;
	}

	@Override
	protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
	return testKerbersProperties;
	}
	}

	}