blob: 8d057bf1d0f65cd31718a0f0f92f15bda0b58369 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.csv;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.nifi.components.PropertyDescriptor;
import org.apache.nifi.controller.ConfigurationContext;
import org.apache.nifi.schema.access.SchemaNotFoundException;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.apache.nifi.util.MockConfigurationContext;
import org.junit.Test;
public class TestCSVHeaderSchemaStrategy {
@Test
public void testSimple() throws SchemaNotFoundException, IOException {
final String headerLine = "\"a\", b, c, d, e\\,z, f";
final byte[] headerBytes = headerLine.getBytes();
final Map<PropertyDescriptor, String> properties = new HashMap<>();
properties.put(CSVUtils.CSV_FORMAT, CSVUtils.CUSTOM.getValue());
properties.put(CSVUtils.COMMENT_MARKER, "#");
properties.put(CSVUtils.VALUE_SEPARATOR, ",");
properties.put(CSVUtils.TRIM_FIELDS, "true");
properties.put(CSVUtils.QUOTE_CHAR, "\"");
properties.put(CSVUtils.ESCAPE_CHAR, "\\");
final ConfigurationContext context = new MockConfigurationContext(properties, null);
final CSVHeaderSchemaStrategy strategy = new CSVHeaderSchemaStrategy(context);
final RecordSchema schema;
try (final InputStream bais = new ByteArrayInputStream(headerBytes)) {
schema = strategy.getSchema(null, bais, null);
}
final List<String> expectedFieldNames = Arrays.asList("a", "b", "c", "d", "e,z", "f");
assertEquals(expectedFieldNames, schema.getFieldNames());
assertTrue(schema.getFields().stream()
.allMatch(field -> field.getDataType().equals(RecordFieldType.STRING.getDataType())));
}
@Test
public void testContainsDuplicateHeaderNames() throws SchemaNotFoundException, IOException {
final String headerLine = "a, a, b";
final byte[] headerBytes = headerLine.getBytes();
final Map<PropertyDescriptor, String> properties = new HashMap<>();
properties.put(CSVUtils.CSV_FORMAT, CSVUtils.CUSTOM.getValue());
properties.put(CSVUtils.COMMENT_MARKER, "#");
properties.put(CSVUtils.VALUE_SEPARATOR, ",");
properties.put(CSVUtils.TRIM_FIELDS, "true");
properties.put(CSVUtils.QUOTE_CHAR, "\"");
properties.put(CSVUtils.ESCAPE_CHAR, "\\");
final ConfigurationContext context = new MockConfigurationContext(properties, null);
final CSVHeaderSchemaStrategy strategy = new CSVHeaderSchemaStrategy(context);
final RecordSchema schema;
try (final InputStream bais = new ByteArrayInputStream(headerBytes)) {
schema = strategy.getSchema(null, bais, null);
}
final List<String> expectedFieldNames = Arrays.asList("a", "b");
assertEquals(expectedFieldNames, schema.getFieldNames());
assertTrue(schema.getFields().stream()
.allMatch(field -> field.getDataType().equals(RecordFieldType.STRING.getDataType())));
}
@Test
public void testWithEL() throws SchemaNotFoundException, IOException {
final String headerLine = "'a'; b; c; d; e^;z; f";
final byte[] headerBytes = headerLine.getBytes();
final Map<PropertyDescriptor, String> properties = new HashMap<>();
properties.put(CSVUtils.CSV_FORMAT, CSVUtils.CUSTOM.getValue());
properties.put(CSVUtils.COMMENT_MARKER, "#");
properties.put(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}");
properties.put(CSVUtils.TRIM_FIELDS, "true");
properties.put(CSVUtils.QUOTE_CHAR, "${csv.quote}");
properties.put(CSVUtils.ESCAPE_CHAR, "${csv.escape}");
final Map<String, String> variables = new HashMap<>();
variables.put("csv.delimiter", ";");
variables.put("csv.quote", "'");
variables.put("csv.escape", "^");
final ConfigurationContext context = new MockConfigurationContext(properties, null);
final CSVHeaderSchemaStrategy strategy = new CSVHeaderSchemaStrategy(context);
final RecordSchema schema;
try (final InputStream bais = new ByteArrayInputStream(headerBytes)) {
schema = strategy.getSchema(variables, bais, null);
}
final List<String> expectedFieldNames = Arrays.asList("a", "b", "c", "d", "e;z", "f");
assertEquals(expectedFieldNames, schema.getFieldNames());
assertTrue(schema.getFields().stream()
.allMatch(field -> field.getDataType().equals(RecordFieldType.STRING.getDataType())));
}
}