blob: fe53597aef81f6297721ddb270512c748a345667 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.paimon.python;
import org.apache.paimon.arrow.ArrowUtils;
import org.apache.paimon.arrow.reader.ArrowBatchReader;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.table.sink.TableWrite;
import org.apache.paimon.types.RowType;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.memory.RootAllocator;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.ipc.ArrowStreamReader;
import org.apache.arrow.vector.types.pojo.Field;
import java.io.ByteArrayInputStream;
import java.util.List;
import java.util.stream.Collectors;
/** Write Arrow bytes to Paimon. */
public class BytesWriter {
private final TableWrite tableWrite;
private final ArrowBatchReader arrowBatchReader;
private final BufferAllocator allocator;
private final List<Field> arrowFields;
public BytesWriter(TableWrite tableWrite, RowType rowType) {
this.tableWrite = tableWrite;
this.arrowBatchReader = new ArrowBatchReader(rowType, true);
this.allocator = new RootAllocator();
arrowFields =
rowType.getFields().stream()
.map(f -> ArrowUtils.toArrowField(f.name(), f.id(), f.type(), 0))
.collect(Collectors.toList());
}
public void write(byte[] bytes) throws Exception {
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
ArrowStreamReader arrowStreamReader = new ArrowStreamReader(bais, allocator);
VectorSchemaRoot vsr = arrowStreamReader.getVectorSchemaRoot();
if (!checkTypesIgnoreNullability(arrowFields, vsr.getSchema().getFields())) {
throw new RuntimeException(
String.format(
"Input schema isn't consistent with table schema.\n"
+ "\tTable schema is: %s\n"
+ "\tInput schema is: %s",
arrowFields, vsr.getSchema().getFields()));
}
while (arrowStreamReader.loadNextBatch()) {
Iterable<InternalRow> rows = arrowBatchReader.readBatch(vsr);
for (InternalRow row : rows) {
tableWrite.write(row);
}
}
arrowStreamReader.close();
}
public void close() {
allocator.close();
}
private boolean checkTypesIgnoreNullability(
List<Field> expectedFields, List<Field> actualFields) {
if (expectedFields.size() != actualFields.size()) {
return false;
}
for (int i = 0; i < expectedFields.size(); i++) {
Field expectedField = expectedFields.get(i);
Field actualField = actualFields.get(i);
// ArrowType doesn't have nullability (similar to DataTypeRoot)
if (!actualField.getType().equals(expectedField.getType())
|| !checkTypesIgnoreNullability(
expectedField.getChildren(), actualField.getChildren())) {
return false;
}
}
return true;
}
}