blob: c8474b1d2146f7e667fe4c3e3ced34ba6addf5c9 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tajo.storage;
import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.datum.IntervalDatum;
import org.apache.tajo.exception.TajoRuntimeException;
import org.apache.tajo.exception.UnsupportedException;
import org.apache.tajo.exception.ValueTooLongForTypeCharactersException;
import org.apache.tajo.util.BitArray;
import java.nio.ByteBuffer;
public class RowStoreUtil {
public static int[] getTargetIds(Schema inSchema, Schema outSchema) {
int[] targetIds = new int[outSchema.size()];
int i = 0;
for (Column target : outSchema.getRootColumns()) {
targetIds[i] = inSchema.getColumnId(target.getQualifiedName());
i++;
}
return targetIds;
}
public static Tuple project(Tuple in, Tuple out, int[] targetIds) {
out.clearOffset();
for (int idx = 0; idx < targetIds.length; idx++) {
out.put(idx, in.asDatum(targetIds[idx]));
}
return out;
}
public static RowStoreEncoder createEncoder(Schema schema) {
return new RowStoreEncoder(schema);
}
public static RowStoreDecoder createDecoder(Schema schema) {
return new RowStoreDecoder(schema);
}
public static class RowStoreDecoder {
private Schema schema;
private BitArray nullFlags;
private int headerSize;
private RowStoreDecoder(Schema schema) {
this.schema = schema;
nullFlags = new BitArray(schema.size());
headerSize = nullFlags.bytesLength();
}
public Tuple toTuple(byte [] bytes) {
nullFlags.clear();
ByteBuffer bb = ByteBuffer.wrap(bytes);
Tuple tuple = new VTuple(schema.size());
Column col;
TajoDataTypes.DataType type;
bb.limit(headerSize);
nullFlags.fromByteBuffer(bb);
bb.limit(bytes.length);
for (int i =0; i < schema.size(); i++) {
if (nullFlags.get(i)) {
tuple.put(i, DatumFactory.createNullDatum());
continue;
}
col = schema.getColumn(i);
type = col.getDataType();
switch (type.getType()) {
case BOOLEAN: tuple.put(i, DatumFactory.createBool(bb.get())); break;
case BIT:
byte b = bb.get();
tuple.put(i, DatumFactory.createBit(b));
break;
case CHAR:
byte [] _str = new byte[type.getLength()];
bb.get(_str);
tuple.put(i, DatumFactory.createChar(_str));
break;
case INT2:
short s = bb.getShort();
tuple.put(i, DatumFactory.createInt2(s));
break;
case INT4:
case DATE:
int i_ = bb.getInt();
tuple.put(i, DatumFactory.createFromInt4(type, i_));
break;
case INT8:
case TIME:
case TIMESTAMP:
long l = bb.getLong();
tuple.put(i, DatumFactory.createFromInt8(type, l));
break;
case INTERVAL:
int month = bb.getInt();
long milliseconds = bb.getLong();
tuple.put(i, new IntervalDatum(month, milliseconds));
break;
case FLOAT4:
float f = bb.getFloat();
tuple.put(i, DatumFactory.createFloat4(f));
break;
case FLOAT8:
double d = bb.getDouble();
tuple.put(i, DatumFactory.createFloat8(d));
break;
case TEXT:
byte [] _string = new byte[bb.getInt()];
bb.get(_string);
tuple.put(i, DatumFactory.createText(_string));
break;
case BLOB:
byte [] _bytes = new byte[bb.getInt()];
bb.get(_bytes);
tuple.put(i, DatumFactory.createBlob(_bytes));
break;
default:
throw new TajoRuntimeException(
new UnsupportedException("data type '" + type.getType().name() + "'"));
}
}
return tuple;
}
public Schema getSchema() {
return schema;
}
}
public static class RowStoreEncoder {
private Schema schema;
private BitArray nullFlags;
private int headerSize;
private RowStoreEncoder(Schema schema) {
this.schema = schema;
nullFlags = new BitArray(schema.size());
headerSize = nullFlags.bytesLength();
}
public byte[] toBytes(Tuple tuple) {
nullFlags.clear();
int size = estimateTupleDataSize(tuple);
ByteBuffer bb = ByteBuffer.allocate(size + headerSize);
bb.position(headerSize);
Column col;
for (int i = 0; i < schema.size(); i++) {
if (tuple.isBlankOrNull(i)) {
nullFlags.set(i);
continue;
}
col = schema.getColumn(i);
switch (col.getDataType().getType()) {
case NULL_TYPE:
nullFlags.set(i);
break;
case BOOLEAN:
bb.put(tuple.getByte(i));
break;
case BIT:
bb.put(tuple.getByte(i));
break;
case CHAR:
int charSize = col.getDataType().getLength();
byte [] _char = new byte[charSize];
byte [] src = tuple.getBytes(i);
if (charSize < src.length) {
throw new ValueTooLongForTypeCharactersException(charSize);
}
System.arraycopy(src, 0, _char, 0, src.length);
bb.put(_char);
break;
case INT2:
bb.putShort(tuple.getInt2(i));
break;
case INT4:
bb.putInt(tuple.getInt4(i));
break;
case INT8:
bb.putLong(tuple.getInt8(i));
break;
case FLOAT4:
bb.putFloat(tuple.getFloat4(i));
break;
case FLOAT8:
bb.putDouble(tuple.getFloat8(i));
break;
case TEXT:
byte[] _string = tuple.getBytes(i);
bb.putInt(_string.length);
bb.put(_string);
break;
case DATE:
bb.putInt(tuple.getInt4(i));
break;
case TIME:
case TIMESTAMP:
bb.putLong(tuple.getInt8(i));
break;
case INTERVAL:
IntervalDatum interval = (IntervalDatum) tuple.getInterval(i);
bb.putInt(interval.getMonths());
bb.putLong(interval.getMilliSeconds());
break;
case BLOB:
byte[] bytes = tuple.getBytes(i);
bb.putInt(bytes.length);
bb.put(bytes);
break;
default:
throw new TajoRuntimeException(
new UnsupportedException("data type '" + col.getDataType().getType().name() + "'"));
}
}
byte[] flags = nullFlags.toArray();
int finalPosition = bb.position();
bb.position(0);
bb.put(flags);
bb.position(finalPosition);
bb.flip();
byte[] buf = new byte[bb.limit()];
bb.get(buf);
return buf;
}
// Note that, NULL values are treated separately
private int estimateTupleDataSize(Tuple tuple) {
int size = 0;
Column col;
for (int i = 0; i < schema.size(); i++) {
if (tuple.isBlankOrNull(i)) {
continue;
}
col = schema.getColumn(i);
switch (col.getDataType().getType()) {
case BOOLEAN:
case BIT:
size += 1;
break;
case CHAR:
size += col.getDataType().getLength();
break;
case INT2:
size += 2;
break;
case DATE:
case INT4:
case FLOAT4:
size += 4;
break;
case TIME:
case TIMESTAMP:
case INT8:
case FLOAT8:
size += 8;
break;
case INTERVAL:
size += 12;
break;
case TEXT:
case BLOB:
size += (4 + tuple.getBytes(i).length);
break;
default:
throw new TajoRuntimeException(
new UnsupportedException("data type '" + col.getDataType().getType().name() + "'"));
}
}
size += 100; // optimistic reservation
return size;
}
public Schema getSchema() {
return schema;
}
}
}