blob: 3fcae4f844d7cd748e8e19c2decbb9a2068cfac9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.nifi.processors.kite;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData.Record;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.kitesdk.data.Dataset;
import org.kitesdk.data.DatasetDescriptor;
import org.kitesdk.data.Datasets;
import static org.apache.nifi.processors.kite.TestUtil.invalidStreamFor;
import static org.apache.nifi.processors.kite.TestUtil.streamFor;
import static org.apache.nifi.processors.kite.TestUtil.user;
@Ignore("Does not work on windows")
public class TestKiteStorageProcessor {
@Rule
public TemporaryFolder temp = new TemporaryFolder();
private String datasetUri = null;
private Dataset<Record> dataset = null;
@Before
public void createDataset() throws Exception {
DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
.schema(TestUtil.USER_SCHEMA)
.build();
this.datasetUri = "dataset:file:" + temp.newFolder("ns", "temp").toString();
this.dataset = Datasets.create(datasetUri, descriptor, Record.class);
}
@After
public void deleteDataset() throws Exception {
Datasets.delete(datasetUri);
}
@Test
public void testBasicStore() throws IOException {
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.assertNotValid();
runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri);
runner.assertValid();
List<Record> users = Lists.newArrayList(
user("a", "a@example.com"),
user("b", "b@example.com"),
user("c", "c@example.com")
);
runner.enqueue(streamFor(users));
runner.run();
runner.assertAllFlowFilesTransferred("success", 1);
runner.assertQueueEmpty();
Assert.assertEquals("Should store 3 values",
3, (long) runner.getCounterValue("Stored records"));
List<Record> stored = Lists.newArrayList(
(Iterable<Record>) dataset.newReader());
Assert.assertEquals("Records should match", users, stored);
}
@Test
public void testViewURI() {
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.setProperty(
StoreInKiteDataset.KITE_DATASET_URI, "view:hive:ns/table?year=2015");
runner.assertValid();
}
@Test
public void testInvalidURI() {
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.setProperty(
StoreInKiteDataset.KITE_DATASET_URI, "dataset:unknown");
runner.assertNotValid();
}
@Test
public void testUnreadableContent() throws IOException {
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri);
runner.assertValid();
runner.enqueue(invalidStreamFor(user("a", "a@example.com")));
runner.run();
runner.assertAllFlowFilesTransferred("failure", 1);
}
@Test
public void testCorruptedBlocks() throws IOException {
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri);
runner.assertValid();
List<Record> records = Lists.newArrayList();
for (int i = 0; i < 10000; i += 1) {
String num = String.valueOf(i);
records.add(user(num, num + "@example.com"));
}
runner.enqueue(invalidStreamFor(records));
runner.run();
long stored = runner.getCounterValue("Stored records");
Assert.assertTrue("Should store some readable values",
0 < stored && stored < 10000);
runner.assertAllFlowFilesTransferred("success", 1);
}
@Test
public void testIncompatibleSchema() throws IOException {
Schema incompatible = SchemaBuilder.record("User").fields()
.requiredLong("id")
.requiredString("username")
.optionalString("email") // the dataset requires this field
.endRecord();
// this user has the email field and could be stored, but the schema is
// still incompatible so the entire stream is rejected
Record incompatibleUser = new Record(incompatible);
incompatibleUser.put("id", 1L);
incompatibleUser.put("username", "a");
incompatibleUser.put("email", "a@example.com");
TestRunner runner = TestRunners.newTestRunner(StoreInKiteDataset.class);
runner.setProperty(StoreInKiteDataset.KITE_DATASET_URI, datasetUri);
runner.assertValid();
runner.enqueue(streamFor(incompatibleUser));
runner.run();
runner.assertAllFlowFilesTransferred("incompatible", 1);
}
}