blob: 0f45aaa37607acf6b40c3da439829cba66f1fb54 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.flink.serialization;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVector;
import org.apache.arrow.vector.DecimalVector;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.Float4Vector;
import org.apache.arrow.vector.Float8Vector;
import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.SmallIntVector;
import org.apache.arrow.vector.TinyIntVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.ipc.ArrowStreamWriter;
import org.apache.arrow.vector.types.FloatingPointPrecision;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.doris.flink.rest.RestService;
import org.apache.doris.flink.rest.models.Schema;
import org.apache.doris.thrift.TScanBatchResult;
import org.apache.doris.thrift.TStatus;
import org.apache.doris.thrift.TStatusCode;
import org.apache.flink.shaded.guava18.com.google.common.collect.ImmutableList;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.StringData;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;
import static org.hamcrest.core.StringStartsWith.startsWith;
public class TestRowBatch {
private static Logger logger = LoggerFactory.getLogger(TestRowBatch.class);
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test
public void testRowBatch() throws Exception {
// schema
ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
childrenBuilder.add(new Field("k0", FieldType.nullable(new ArrowType.Bool()), null));
childrenBuilder.add(new Field("k1", FieldType.nullable(new ArrowType.Int(8, true)), null));
childrenBuilder.add(new Field("k2", FieldType.nullable(new ArrowType.Int(16, true)), null));
childrenBuilder.add(new Field("k3", FieldType.nullable(new ArrowType.Int(32, true)), null));
childrenBuilder.add(new Field("k4", FieldType.nullable(new ArrowType.Int(64, true)), null));
childrenBuilder.add(new Field("k9", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)), null));
childrenBuilder.add(new Field("k8", FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), null));
childrenBuilder.add(new Field("k10", FieldType.nullable(new ArrowType.Utf8()), null));
childrenBuilder.add(new Field("k11", FieldType.nullable(new ArrowType.Utf8()), null));
childrenBuilder.add(new Field("k5", FieldType.nullable(new ArrowType.Decimal(9,2)), null));
childrenBuilder.add(new Field("k6", FieldType.nullable(new ArrowType.Utf8()), null));
VectorSchemaRoot root = VectorSchemaRoot.create(
new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null),
new RootAllocator(Integer.MAX_VALUE));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(
root,
new DictionaryProvider.MapDictionaryProvider(),
outputStream);
arrowStreamWriter.start();
root.setRowCount(3);
FieldVector vector = root.getVector("k0");
BitVector bitVector = (BitVector)vector;
bitVector.setInitialCapacity(3);
bitVector.allocateNew(3);
bitVector.setSafe(0, 1);
bitVector.setSafe(1, 0);
bitVector.setSafe(2, 1);
vector.setValueCount(3);
vector = root.getVector("k1");
TinyIntVector tinyIntVector = (TinyIntVector)vector;
tinyIntVector.setInitialCapacity(3);
tinyIntVector.allocateNew(3);
tinyIntVector.setSafe(0, 1);
tinyIntVector.setSafe(1, 2);
tinyIntVector.setSafe(2, 3);
vector.setValueCount(3);
vector = root.getVector("k2");
SmallIntVector smallIntVector = (SmallIntVector)vector;
smallIntVector.setInitialCapacity(3);
smallIntVector.allocateNew(3);
smallIntVector.setSafe(0, 1);
smallIntVector.setSafe(1, 2);
smallIntVector.setSafe(2, 3);
vector.setValueCount(3);
vector = root.getVector("k3");
IntVector intVector = (IntVector)vector;
intVector.setInitialCapacity(3);
intVector.allocateNew(3);
intVector.setSafe(0, 1);
intVector.setNull(1);
intVector.setSafe(2, 3);
vector.setValueCount(3);
vector = root.getVector("k4");
BigIntVector bigIntVector = (BigIntVector)vector;
bigIntVector.setInitialCapacity(3);
bigIntVector.allocateNew(3);
bigIntVector.setSafe(0, 1);
bigIntVector.setSafe(1, 2);
bigIntVector.setSafe(2, 3);
vector.setValueCount(3);
vector = root.getVector("k5");
DecimalVector decimalVector = (DecimalVector)vector;
decimalVector.setInitialCapacity(3);
decimalVector.allocateNew();
decimalVector.setIndexDefined(0);
decimalVector.setSafe(0, new BigDecimal("12.34"));
decimalVector.setIndexDefined(1);
decimalVector.setSafe(1, new BigDecimal("88.88"));
decimalVector.setIndexDefined(2);
decimalVector.setSafe(2, new BigDecimal("10.22"));
vector.setValueCount(3);
vector = root.getVector("k6");
VarCharVector charVector = (VarCharVector)vector;
charVector.setInitialCapacity(3);
charVector.allocateNew();
charVector.setIndexDefined(0);
charVector.setValueLengthSafe(0, 5);
charVector.setSafe(0, "char1".getBytes());
charVector.setIndexDefined(1);
charVector.setValueLengthSafe(1, 5);
charVector.setSafe(1, "char2".getBytes());
charVector.setIndexDefined(2);
charVector.setValueLengthSafe(2, 5);
charVector.setSafe(2, "char3".getBytes());
vector.setValueCount(3);
vector = root.getVector("k8");
Float8Vector float8Vector = (Float8Vector)vector;
float8Vector.setInitialCapacity(3);
float8Vector.allocateNew(3);
float8Vector.setSafe(0, 1.1);
float8Vector.setSafe(1, 2.2);
float8Vector.setSafe(2, 3.3);
vector.setValueCount(3);
vector = root.getVector("k9");
Float4Vector float4Vector = (Float4Vector)vector;
float4Vector.setInitialCapacity(3);
float4Vector.allocateNew(3);
float4Vector.setSafe(0, 1.1f);
float4Vector.setSafe(1, 2.2f);
float4Vector.setSafe(2, 3.3f);
vector.setValueCount(3);
vector = root.getVector("k10");
VarCharVector datecharVector = (VarCharVector)vector;
datecharVector.setInitialCapacity(3);
datecharVector.allocateNew();
datecharVector.setIndexDefined(0);
datecharVector.setValueLengthSafe(0, 5);
datecharVector.setSafe(0, "2008-08-08".getBytes());
datecharVector.setIndexDefined(1);
datecharVector.setValueLengthSafe(1, 5);
datecharVector.setSafe(1, "1900-08-08".getBytes());
datecharVector.setIndexDefined(2);
datecharVector.setValueLengthSafe(2, 5);
datecharVector.setSafe(2, "2100-08-08".getBytes());
vector.setValueCount(3);
vector = root.getVector("k11");
VarCharVector timecharVector = (VarCharVector)vector;
timecharVector.setInitialCapacity(3);
timecharVector.allocateNew();
timecharVector.setIndexDefined(0);
timecharVector.setValueLengthSafe(0, 5);
timecharVector.setSafe(0, "2008-08-08 00:00:00".getBytes());
timecharVector.setIndexDefined(1);
timecharVector.setValueLengthSafe(1, 5);
timecharVector.setSafe(1, "1900-08-08 00:00:00".getBytes());
timecharVector.setIndexDefined(2);
timecharVector.setValueLengthSafe(2, 5);
timecharVector.setSafe(2, "2100-08-08 00:00:00".getBytes());
vector.setValueCount(3);
arrowStreamWriter.writeBatch();
arrowStreamWriter.end();
arrowStreamWriter.close();
TStatus status = new TStatus();
status.setStatusCode(TStatusCode.OK);
TScanBatchResult scanBatchResult = new TScanBatchResult();
scanBatchResult.setStatus(status);
scanBatchResult.setEos(false);
scanBatchResult.setRows(outputStream.toByteArray());
String schemaStr = "{\"properties\":[{\"type\":\"BOOLEAN\",\"name\":\"k0\",\"comment\":\"\"},"
+ "{\"type\":\"TINYINT\",\"name\":\"k1\",\"comment\":\"\"},{\"type\":\"SMALLINT\",\"name\":\"k2\","
+ "\"comment\":\"\"},{\"type\":\"INT\",\"name\":\"k3\",\"comment\":\"\"},{\"type\":\"BIGINT\","
+ "\"name\":\"k4\",\"comment\":\"\"},{\"type\":\"FLOAT\",\"name\":\"k9\",\"comment\":\"\"},"
+ "{\"type\":\"DOUBLE\",\"name\":\"k8\",\"comment\":\"\"},{\"type\":\"DATE\",\"name\":\"k10\","
+ "\"comment\":\"\"},{\"type\":\"DATETIME\",\"name\":\"k11\",\"comment\":\"\"},"
+ "{\"name\":\"k5\",\"scale\":\"0\",\"comment\":\"\","
+ "\"type\":\"DECIMAL\",\"precision\":\"9\",\"aggregation_type\":\"\"},{\"type\":\"CHAR\",\"name\":\"k6\",\"comment\":\"\",\"aggregation_type\":\"REPLACE_IF_NOT_NULL\"}],"
+ "\"status\":200}";
Schema schema = RestService.parseSchema(schemaStr, logger);
RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow();
List<Object> expectedRow1 = Lists.newArrayList(
Boolean.TRUE,
(byte) 1,
(short) 1,
1,
1L,
(float) 1.1,
(double) 1.1,
StringData.fromString("2008-08-08"),
StringData.fromString("2008-08-08 00:00:00"),
DecimalData.fromBigDecimal(new BigDecimal(12.34), 4, 2),
StringData.fromString("char1")
);
List<Object> expectedRow2 = Arrays.asList(
Boolean.FALSE,
(byte) 2,
(short) 2,
null,
2L,
(float) 2.2,
(double) 2.2,
StringData.fromString("1900-08-08"),
StringData.fromString("1900-08-08 00:00:00"),
DecimalData.fromBigDecimal(new BigDecimal(88.88), 4, 2),
StringData.fromString("char2")
);
List<Object> expectedRow3 = Arrays.asList(
Boolean.TRUE,
(byte) 3,
(short) 3,
3,
3L,
(float) 3.3,
(double) 3.3,
StringData.fromString("2100-08-08"),
StringData.fromString("2100-08-08 00:00:00"),
DecimalData.fromBigDecimal(new BigDecimal(10.22), 4, 2),
StringData.fromString("char3")
);
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow1 = rowBatch.next();
Assert.assertEquals(expectedRow1, actualRow1);
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow2 = rowBatch.next();
Assert.assertEquals(expectedRow2, actualRow2);
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow3 = rowBatch.next();
Assert.assertEquals(expectedRow3, actualRow3);
Assert.assertFalse(rowBatch.hasNext());
thrown.expect(NoSuchElementException.class);
thrown.expectMessage(startsWith("Get row offset:"));
rowBatch.next();
}
@Test
public void testBinary() throws Exception {
byte[] binaryRow0 = {'a', 'b', 'c'};
byte[] binaryRow1 = {'d', 'e', 'f'};
byte[] binaryRow2 = {'g', 'h', 'i'};
ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Binary()), null));
VectorSchemaRoot root = VectorSchemaRoot.create(
new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null),
new RootAllocator(Integer.MAX_VALUE));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(
root,
new DictionaryProvider.MapDictionaryProvider(),
outputStream);
arrowStreamWriter.start();
root.setRowCount(3);
FieldVector vector = root.getVector("k7");
VarBinaryVector varBinaryVector = (VarBinaryVector) vector;
varBinaryVector.setInitialCapacity(3);
varBinaryVector.allocateNew();
varBinaryVector.setIndexDefined(0);
varBinaryVector.setValueLengthSafe(0, 3);
varBinaryVector.setSafe(0, binaryRow0);
varBinaryVector.setIndexDefined(1);
varBinaryVector.setValueLengthSafe(1, 3);
varBinaryVector.setSafe(1, binaryRow1);
varBinaryVector.setIndexDefined(2);
varBinaryVector.setValueLengthSafe(2, 3);
varBinaryVector.setSafe(2, binaryRow2);
vector.setValueCount(3);
arrowStreamWriter.writeBatch();
arrowStreamWriter.end();
arrowStreamWriter.close();
TStatus status = new TStatus();
status.setStatusCode(TStatusCode.OK);
TScanBatchResult scanBatchResult = new TScanBatchResult();
scanBatchResult.setStatus(status);
scanBatchResult.setEos(false);
scanBatchResult.setRows(outputStream.toByteArray());
String schemaStr = "{\"properties\":[{\"type\":\"BINARY\",\"name\":\"k7\",\"comment\":\"\"}], \"status\":200}";
Schema schema = RestService.parseSchema(schemaStr, logger);
RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow();
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow0 = rowBatch.next();
Assert.assertArrayEquals(binaryRow0, (byte[])actualRow0.get(0));
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow1 = rowBatch.next();
Assert.assertArrayEquals(binaryRow1, (byte[])actualRow1.get(0));
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow2 = rowBatch.next();
Assert.assertArrayEquals(binaryRow2, (byte[])actualRow2.get(0));
Assert.assertFalse(rowBatch.hasNext());
thrown.expect(NoSuchElementException.class);
thrown.expectMessage(startsWith("Get row offset:"));
rowBatch.next();
}
@Test
public void testDecimalV2() throws Exception {
ImmutableList.Builder<Field> childrenBuilder = ImmutableList.builder();
childrenBuilder.add(new Field("k7", FieldType.nullable(new ArrowType.Decimal(27, 9)), null));
VectorSchemaRoot root = VectorSchemaRoot.create(
new org.apache.arrow.vector.types.pojo.Schema(childrenBuilder.build(), null),
new RootAllocator(Integer.MAX_VALUE));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(
root,
new DictionaryProvider.MapDictionaryProvider(),
outputStream);
arrowStreamWriter.start();
root.setRowCount(3);
FieldVector vector = root.getVector("k7");
DecimalVector decimalVector = (DecimalVector) vector;
decimalVector.setInitialCapacity(3);
decimalVector.allocateNew(3);
decimalVector.setSafe(0, new BigDecimal("12.340000000"));
decimalVector.setSafe(1, new BigDecimal("88.880000000"));
decimalVector.setSafe(2, new BigDecimal("10.000000000"));
vector.setValueCount(3);
arrowStreamWriter.writeBatch();
arrowStreamWriter.end();
arrowStreamWriter.close();
TStatus status = new TStatus();
status.setStatusCode(TStatusCode.OK);
TScanBatchResult scanBatchResult = new TScanBatchResult();
scanBatchResult.setStatus(status);
scanBatchResult.setEos(false);
scanBatchResult.setRows(outputStream.toByteArray());
String schemaStr = "{\"properties\":[{\"type\":\"DECIMALV2\",\"scale\": 0,"
+ "\"precision\": 9, \"name\":\"k7\",\"comment\":\"\"}], "
+ "\"status\":200}";
Schema schema = RestService.parseSchema(schemaStr, logger);
RowBatch rowBatch = new RowBatch(scanBatchResult, schema).readArrow();
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow0 = rowBatch.next();
Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(12.340000000), 11, 9), actualRow0.get(0));
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow1 = rowBatch.next();
Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(88.880000000), 11, 9), actualRow1.get(0));
Assert.assertTrue(rowBatch.hasNext());
List<Object> actualRow2 = rowBatch.next();
Assert.assertEquals(DecimalData.fromBigDecimal(new BigDecimal(10.000000000),11, 9), actualRow2.get(0));
Assert.assertFalse(rowBatch.hasNext());
thrown.expect(NoSuchElementException.class);
thrown.expectMessage(startsWith("Get row offset:"));
rowBatch.next();
}
}