blob: 01243522cd7c315932573d279ae08ae726ef7473 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.hadoop;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.DefaultCodec;
import org.apache.nifi.components.AllowableValue;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.flowfile.attributes.CoreAttributes;
import org.apache.nifi.flowfile.attributes.StandardFlowFileMediaType;
import org.apache.nifi.hadoop.KerberosProperties;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.NiFiProperties;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
public class TestCreateHadoopSequenceFile {
private TestRunner controller;
private final File testdata = new File("src/test/resources/testdata");
private final File[] inFiles = new File[]{new File(testdata, "randombytes-1"),
new File(testdata, "randombytes-2"), new File(testdata, "randombytes-3")
};
private NiFiProperties mockNiFiProperties;
private KerberosProperties kerberosProperties;
@BeforeClass
public static void setUpClass() {
System.setProperty("org.slf4j.simpleLogger.defaultLogLevel", "info");
System.setProperty("org.slf4j.simpleLogger.log.nifi.processors.hadoop", "debug");
}
@Before
public void setUp() {
mockNiFiProperties = mock(NiFiProperties.class);
when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
kerberosProperties = new KerberosProperties(null);
CreateHadoopSequenceFile proc = new TestableCreateHadoopSequenceFile(kerberosProperties);
controller = TestRunners.newTestRunner(proc);
}
@After
public void tearDown() {
controller.clearTransferState();
}
@Test
public void validateAllowableValuesForCompressionType() {
PropertyDescriptor pd = CreateHadoopSequenceFile.COMPRESSION_TYPE;
List<AllowableValue> allowableValues = pd.getAllowableValues();
assertEquals("NONE", allowableValues.get(0).getValue());
assertEquals("RECORD", allowableValues.get(1).getValue());
assertEquals("BLOCK", allowableValues.get(2).getValue());
}
@Test
public void testSimpleCase() throws IOException {
for (File inFile : inFiles) {
try (FileInputStream fin = new FileInputStream(inFile) ) {
controller.enqueue(fin);
}
}
controller.run(3);
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);
assertEquals(0, failedFlowFiles.size());
assertEquals(3, successSeqFiles.size());
}
@Test
public void testSequenceFileSaysValueIsBytesWritable() throws UnsupportedEncodingException, IOException {
for (File inFile : inFiles) {
try (FileInputStream fin = new FileInputStream(inFile) ){
controller.enqueue(fin);
}
}
controller.run(3);
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);
assertEquals(0, failedFlowFiles.size());
assertEquals(3, successSeqFiles.size());
final byte[] data = successSeqFiles.iterator().next().toByteArray();
final String magicHeader = new String(data, 0, 3, "UTF-8");
assertEquals("SEQ", magicHeader);
// Format of header is SEQ followed by the version (1 byte).
// Then, the length of the Key type (1 byte), then the Key type
// Then, the length of the Value type(1 byte), then the Value type
final String keyType = Text.class.getCanonicalName();
final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
final int valueTypeLength = data[5 + keyType.length()];
final String valueType = BytesWritable.class.getCanonicalName();
assertEquals(valueType.length(), valueTypeLength);
assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
// FileOutputStream fos = new FileOutputStream("test.sf");
// fos.write(data);
// fos.flush();
// fos.close();
}
@Test
public void testMergedTarData() throws IOException {
Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.MIME_TYPE.key(), "application/tar");
try (final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545312236534130.tar")) {
controller.enqueue(fin, attributes);
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
assertEquals(1, successSeqFiles.size());
final byte[] data = successSeqFiles.iterator().next().toByteArray();
// Data should be greater than 1000000 because that's the size of 2 of our input files,
// and the file size should contain all of that plus headers, but the headers should only
// be a couple hundred bytes.
assertTrue(data.length > 1000000);
assertTrue(data.length < 1501000);
}
}
@Test
public void testMergedZipData() throws IOException {
Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.MIME_TYPE.key(), "application/zip");
try (FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545423550275052.zip")){
controller.enqueue(fin, attributes);
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
assertEquals(1, successSeqFiles.size());
final byte[] data = successSeqFiles.iterator().next().toByteArray();
// Data should be greater than 1000000 because that's the size of 2 of our input files,
// and the file size should contain all of that plus headers, but the headers should only
// be a couple hundred bytes.
assertTrue(data.length > 1000000);
assertTrue(data.length < 1501000);
}
// FileOutputStream fos = new FileOutputStream("zip-3-randoms.sf");
// fos.write(data);
// fos.flush();
// fos.close();
}
@Test
public void testMergedFlowfilePackagedData() throws IOException {
Map<String, String> attributes = new HashMap<>();
attributes.put(CoreAttributes.MIME_TYPE.key(), StandardFlowFileMediaType.VERSION_3.getMediaType());
try ( final FileInputStream fin = new FileInputStream("src/test/resources/testdata/13545479542069498.pkg")) {
controller.enqueue(fin, attributes);
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
assertEquals(1, successSeqFiles.size());
final byte[] data = successSeqFiles.iterator().next().toByteArray();
// Data should be greater than 1000000 because that's the size of 2 of our input files,
// and the file size should contain all of that plus headers, but the headers should only
// be a couple hundred bytes.
assertTrue(data.length > 1000000);
assertTrue(data.length < 1501000);
}
// FileOutputStream fos = new FileOutputStream("flowfilePkg-3-randoms.sf");
// fos.write(data);
// fos.flush();
// fos.close();
}
@Test
public void testSequenceFileBzipCompressionCodec() throws UnsupportedEncodingException, IOException {
controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.BZIP.name());
controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());
File inFile = inFiles[0];
try (FileInputStream fin = new FileInputStream(inFile) ){
controller.enqueue(fin);
}
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);
assertEquals(0, failedFlowFiles.size());
assertEquals(1, successSeqFiles.size());
MockFlowFile ff = successSeqFiles.iterator().next();
byte[] data = ff.toByteArray();
final String magicHeader = new String(data, 0, 3, "UTF-8");
assertEquals("SEQ", magicHeader);
// Format of header is SEQ followed by the version (1 byte).
// Then, the length of the Key type (1 byte), then the Key type
// Then, the length of the Value type(1 byte), then the Value type
final String keyType = Text.class.getCanonicalName();
final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
final int valueTypeLength = data[5 + keyType.length()];
final String valueType = BytesWritable.class.getCanonicalName();
assertEquals(valueType.length(), valueTypeLength);
assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
final int blockCompressionIndex = compressionIndex + 1;
assertEquals(1, data[compressionIndex]);
assertEquals(1, data[blockCompressionIndex]);
final int codecTypeSize = data[blockCompressionIndex + 1];
final int codecTypeStartIndex = blockCompressionIndex + 2;
assertEquals(BZip2Codec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
}
@Test
public void testSequenceFileDefaultCompressionCodec() throws UnsupportedEncodingException, IOException {
controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.DEFAULT.name());
controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());
File inFile = inFiles[0];
try (FileInputStream fin = new FileInputStream(inFile) ){
controller.enqueue(fin);
}
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);
assertEquals(0, failedFlowFiles.size());
assertEquals(1, successSeqFiles.size());
MockFlowFile ff = successSeqFiles.iterator().next();
byte[] data = ff.toByteArray();
final String magicHeader = new String(data, 0, 3, "UTF-8");
assertEquals("SEQ", magicHeader);
// Format of header is SEQ followed by the version (1 byte).
// Then, the length of the Key type (1 byte), then the Key type
// Then, the length of the Value type(1 byte), then the Value type
final String keyType = Text.class.getCanonicalName();
final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
final int valueTypeLength = data[5 + keyType.length()];
final String valueType = BytesWritable.class.getCanonicalName();
assertEquals(valueType.length(), valueTypeLength);
assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
final int blockCompressionIndex = compressionIndex + 1;
assertEquals(1, data[compressionIndex]);
assertEquals(1, data[blockCompressionIndex]);
final int codecTypeSize = data[blockCompressionIndex + 1];
final int codecTypeStartIndex = blockCompressionIndex + 2;
assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
}
@Test
public void testSequenceFileNoneCompressionCodec() throws UnsupportedEncodingException, IOException {
controller.setProperty(AbstractHadoopProcessor.COMPRESSION_CODEC, CompressionType.NONE.name());
controller.setProperty(CreateHadoopSequenceFile.COMPRESSION_TYPE, SequenceFile.CompressionType.BLOCK.name());
File inFile = inFiles[0];
try (FileInputStream fin = new FileInputStream(inFile) ){
controller.enqueue(fin);
}
controller.run();
List<MockFlowFile> successSeqFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_SUCCESS);
List<MockFlowFile> failedFlowFiles = controller.getFlowFilesForRelationship(CreateHadoopSequenceFile.RELATIONSHIP_FAILURE);
assertEquals(0, failedFlowFiles.size());
assertEquals(1, successSeqFiles.size());
MockFlowFile ff = successSeqFiles.iterator().next();
byte[] data = ff.toByteArray();
final String magicHeader = new String(data, 0, 3, "UTF-8");
assertEquals("SEQ", magicHeader);
// Format of header is SEQ followed by the version (1 byte).
// Then, the length of the Key type (1 byte), then the Key type
// Then, the length of the Value type(1 byte), then the Value type
final String keyType = Text.class.getCanonicalName();
final int valueTypeStart = 3 + 1 + 1 + keyType.length() + 1;
final int valueTypeLength = data[5 + keyType.length()];
final String valueType = BytesWritable.class.getCanonicalName();
assertEquals(valueType.length(), valueTypeLength);
assertEquals(valueType, new String(data, valueTypeStart, valueType.length(), "UTF-8"));
final int compressionIndex = 3 + 1 + 1 + keyType.length() + 1 + valueType.length();
final int blockCompressionIndex = compressionIndex + 1;
assertEquals(1, data[compressionIndex]);
assertEquals(1, data[blockCompressionIndex]);
final int codecTypeSize = data[blockCompressionIndex + 1];
final int codecTypeStartIndex = blockCompressionIndex + 2;
assertEquals(DefaultCodec.class.getCanonicalName(), new String(data, codecTypeStartIndex, codecTypeSize, "UTF-8"));
}
private static class TestableCreateHadoopSequenceFile extends CreateHadoopSequenceFile {
private KerberosProperties testKerbersProperties;
public TestableCreateHadoopSequenceFile(KerberosProperties testKerbersProperties) {
this.testKerbersProperties = testKerbersProperties;
}
@Override
protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
return testKerbersProperties;
}
}
}