blob: 2878013820a5dfa2791c1754832b3208016c0ea9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.json;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.schema.inference.InferSchemaAccessStrategy;
import org.apache.nifi.schema.inference.TimeValueInference;
import org.apache.nifi.serialization.record.RecordFieldType;
import org.apache.nifi.serialization.record.RecordSchema;
import org.junit.Test;
import org.mockito.Mockito;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
public class TestJsonSchemaInference {
private final TimeValueInference timestampInference = new TimeValueInference("yyyy-MM-dd", "HH:mm:ss", "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
@Test
public void testInferenceIncludesAllRecords() throws IOException {
final File file = new File("src/test/resources/json/data-types.json");
final RecordSchema schema;
try (final InputStream in = new FileInputStream(file);
final InputStream bufferedIn = new BufferedInputStream(in)) {
final InferSchemaAccessStrategy<?> accessStrategy = new InferSchemaAccessStrategy<>(
(var, content) -> new JsonRecordSource(content),
new JsonSchemaInference(timestampInference), Mockito.mock(ComponentLog.class));
schema = accessStrategy.getSchema(null, bufferedIn, null);
}
assertSame(RecordFieldType.STRING, schema.getDataType("varcharc").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("uuid").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("tinyintc").get().getFieldType());
assertSame(RecordFieldType.STRING, schema.getDataType("textc").get().getFieldType());
assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), schema.getDataType("datec").get());
assertSame(RecordFieldType.INT, schema.getDataType("smallintc").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("mediumintc").get().getFieldType());
assertSame(RecordFieldType.LONG, schema.getDataType("longintc").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("intc").get().getFieldType());
assertSame(RecordFieldType.BIGINT, schema.getDataType("bigintc").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("floatc").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("doublec").get().getFieldType());
assertSame(RecordFieldType.DOUBLE, schema.getDataType("decimalc").get().getFieldType());
assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), schema.getDataType("timestampc").get());
assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"), schema.getDataType("timec").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("charc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("tinytextc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("blobc").get());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("mediumtextc").get());
assertSame(RecordFieldType.INT, schema.getDataType("enumc").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("setc").get().getFieldType());
assertSame(RecordFieldType.INT, schema.getDataType("boolc").get().getFieldType());
assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("binaryc").get());
// We currently do not read BigDecimal from JSON as ObjectMapper in InferenceSchemaStrategy automatically reads it as double
final List<String> fieldNames = schema.getFieldNames();
assertEquals(Arrays.asList("varcharc", "uuid", "tinyintc", "textc", "datec", "smallintc", "mediumintc", "longintc", "intc", "bigintc",
"floatc", "doublec", "decimalc", "timestampc", "timec", "charc", "tinytextc", "blobc", "mediumtextc", "enumc", "setc", "boolc", "binaryc"), fieldNames);
}
}