| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.fury.format.row.binary; |
| |
| import static org.apache.fury.util.Preconditions.checkArgument; |
| |
| import java.math.BigDecimal; |
| import java.util.HashMap; |
| import java.util.Map; |
| import org.apache.arrow.vector.types.pojo.ArrowType; |
| import org.apache.arrow.vector.types.pojo.Field; |
| import org.apache.arrow.vector.types.pojo.Schema; |
| import org.apache.fury.format.row.Row; |
| import org.apache.fury.format.type.DataTypes; |
| import org.apache.fury.memory.BitUtils; |
| import org.apache.fury.memory.MemoryBuffer; |
| import org.apache.fury.memory.MemoryUtils; |
| import org.apache.fury.util.Preconditions; |
| |
| /** |
| * A binary implementation of {@link Row} backed by binary buffer instead of java objects. |
| * |
| * <ul> |
| * <li>Validity Bit Set Bitmap Region (1 bit/field) for tracking null values. Primitive type is |
| * always considered to be not null. Set bit to 1 indicate the value is not null, Set bit to 0 |
| * indicate null |
| * <li>Fixed-Length 8-byte Values Region. if field isn't aligned, read any length-gt-1 value may |
| * need read multi times cache. |
| * <li>Variable-Length Data Section |
| * </ul> |
| * |
| * <p>Equality comparison and hashing of rows can be performed on raw bytes since if two rows are |
| * identical so should be their bit-wise representation. |
| * |
| * <ul> |
| * BinaryRow is inspired by Apache Spark tungsten UnsafeRow, the differences are |
| * <li>Use arrow schema to describe meta. |
| * <li>String support latin/utf16/utf8 encoding. |
| * <li>Decimal use arrow decimal format. |
| * <li>Variable-size field can be inline in fixed-size region if small enough. |
| * <li>Allow skip padding by generate Row using aot to put offsets in generated code. |
| * <li>The implementation support java/C++/python/etc.. |
| * <li>Support adding fields without breaking compatibility |
| * </ul> |
| */ |
| public class BinaryRow extends UnsafeTrait implements Row { |
| private final Schema schema; |
| private final int numFields; |
| private final int bitmapWidthInBytes; |
| private MemoryBuffer buffer; |
| private int baseOffset; |
| private int sizeInBytes; |
| |
| public BinaryRow(Schema schema) { |
| this.schema = schema; |
| this.numFields = schema.getFields().size(); |
| Preconditions.checkArgument(numFields > 0); |
| this.bitmapWidthInBytes = BitUtils.calculateBitmapWidthInBytes(numFields); |
| } |
| |
| public void pointTo(MemoryBuffer buffer, int offset, int sizeInBytes) { |
| this.buffer = buffer; |
| this.baseOffset = offset; |
| this.sizeInBytes = sizeInBytes; |
| } |
| |
| @Override |
| public Schema getSchema() { |
| return schema; |
| } |
| |
| @Override |
| public int numFields() { |
| return numFields; |
| } |
| |
| public int getSizeInBytes() { |
| return sizeInBytes; |
| } |
| |
| @Override |
| public int getBaseOffset() { |
| return baseOffset; |
| } |
| |
| @Override |
| public MemoryBuffer getBuffer() { |
| return buffer; |
| } |
| |
| @Override |
| public int getOffset(int ordinal) { |
| return baseOffset + bitmapWidthInBytes + (ordinal << 3); // ordinal * 8 = (ordinal << 3) |
| } |
| |
| @Override |
| public void assertIndexIsValid(int index) { |
| assert index >= 0 : "index (" + index + ") should >= 0"; |
| checkArgument(index < numFields, "index (%d) should < %d", index, numFields); |
| } |
| |
| @Override |
| public boolean isNullAt(int ordinal) { |
| return BitUtils.isSet(buffer, baseOffset, ordinal); |
| } |
| |
| @Override |
| public boolean anyNull() { |
| return BitUtils.anySet(buffer, baseOffset, bitmapWidthInBytes); |
| } |
| |
| @Override |
| public void setNullAt(int ordinal) { |
| assertIndexIsValid(ordinal); |
| BitUtils.set(buffer, baseOffset, ordinal); |
| assert DataTypes.getTypeWidth(schema.getFields().get(ordinal).getType()) > 0 |
| : "field[ " |
| + ordinal |
| + " " |
| + schema.getFields().get(ordinal).getType() |
| + " ] " |
| + "must be fixed-width"; |
| // To preserve row equality, zero out the value when setting the column to null. |
| // Since this row does not currently support updates to variable-length values, we don't |
| // have to worry about zeroing out that data. |
| buffer.putInt64(getOffset(ordinal), 0); |
| } |
| |
| public void setNotNullAt(int ordinal) { |
| assertIndexIsValid(ordinal); |
| BitUtils.unset(buffer, baseOffset, ordinal); |
| } |
| |
| @Override |
| public BigDecimal getDecimal(int ordinal) { |
| return getDecimal(ordinal, (ArrowType.Decimal) schema.getFields().get(ordinal).getType()); |
| } |
| |
| @Override |
| public BinaryRow getStruct(int ordinal) { |
| return getStruct(ordinal, schema.getFields().get(ordinal)); |
| } |
| |
| @Override |
| public BinaryArray getArray(int ordinal) { |
| return getArray(ordinal, schema.getFields().get(ordinal)); |
| } |
| |
| @Override |
| public BinaryMap getMap(int ordinal) { |
| return getMap(ordinal, schema.getFields().get(ordinal)); |
| } |
| |
| @Override |
| public Row copy() { |
| MemoryBuffer copyBuf = MemoryUtils.buffer(sizeInBytes); |
| buffer.copyTo(baseOffset, copyBuf, 0, sizeInBytes); |
| BinaryRow copyRow = new BinaryRow(schema); |
| copyRow.pointTo(copyBuf, 0, sizeInBytes); |
| return copyRow; |
| } |
| |
| @Override |
| public String toString() { |
| if (buffer == null) { |
| return "null"; |
| } else { |
| StringBuilder build = new StringBuilder("{"); |
| for (int i = 0; i < numFields; i++) { |
| if (i != 0) { |
| build.append(", "); |
| } |
| Field field = schema.getFields().get(i); |
| build.append(field.getName()).append("="); |
| if (isNullAt(i)) { |
| build.append("null"); |
| } else { |
| build.append(get(i, field)); |
| } |
| } |
| |
| build.append("}"); |
| return build.toString(); |
| } |
| } |
| |
| public String toDebugString() { |
| if (buffer == null) { |
| return "null"; |
| } else { |
| StringBuilder build = new StringBuilder(); |
| for (int i = 0; i < bitmapWidthInBytes + 8 * numFields; i += 8) { |
| if (i != 0) { |
| build.append(','); |
| } |
| build.append(Long.toHexString(buffer.getInt64(baseOffset + i))); |
| } |
| return build.toString(); |
| } |
| } |
| |
| public Map<String, Object> toMap() { |
| Map<String, Object> map = new HashMap<>(); |
| for (int i = 0; i < numFields; i++) { |
| Field field = schema.getFields().get(i); |
| map.put(field.getName(), get(i, field)); |
| } |
| return map; |
| } |
| |
| public byte[] toBytes() { |
| return buffer.getBytes(baseOffset, sizeInBytes); |
| } |
| |
| /** |
| * If it is a fixed-length field, we can call this BinaryRow's setXX method for in-place updates. |
| * If it is variable-length field, can't use this method, because the underlying data is stored |
| * continuously. |
| */ |
| public static boolean isFixedLength(ArrowType type) { |
| return DataTypes.getTypeWidth(type) > 0; |
| } |
| } |