blob: 47e95e45c84255a3954b5897e125492578f774dd [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.uci.ics.hivesterix.serde.lazy;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
/**
* LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
* |-----|---------|--- ... ---|-----|---------|
* Section A is one null-byte, corresponding to eight struct fields in Section
* B. Each bit indicates whether the corresponding field is null (0) or not null
* (1). Each field is a LazyObject.
* Following B, there is another section A and B. This pattern repeats until the
* all struct fields are serialized.
*/
public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
/**
* Whether the data is already parsed or not.
*/
boolean parsed;
/**
* The fields of the struct.
*/
@SuppressWarnings("rawtypes")
LazyObject[] fields;
/**
* Whether a field is initialized or not.
*/
boolean[] fieldInited;
/**
* Whether a field is null or not. Because length is 0 does not means the
* field is null. In particular, a 0-length string is not null.
*/
boolean[] fieldIsNull;
/**
* The start positions and lengths of struct fields. Only valid when the
* data is parsed.
*/
int[] fieldStart;
int[] fieldLength;
/**
* Construct a LazyStruct object with an ObjectInspector.
*/
protected LazyStruct(LazyStructObjectInspector oi) {
super(oi);
}
@Override
public void init(byte[] bytes, int start, int length) {
super.init(bytes, start, length);
parsed = false;
}
RecordInfo recordInfo = new LazyUtils.RecordInfo();
boolean missingFieldWarned = false;
boolean extraFieldWarned = false;
/**
* Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
* fieldIsNull.
*/
private void parse() {
List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
if (fields == null) {
fields = new LazyObject[fieldRefs.size()];
for (int i = 0; i < fields.length; i++) {
ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector();
fields[i] = insp == null ? null : LazyFactory.createLazyObject(insp);
}
fieldInited = new boolean[fields.length];
fieldIsNull = new boolean[fields.length];
fieldStart = new int[fields.length];
fieldLength = new int[fields.length];
}
/**
* Please note that one null byte is followed by eight fields, then more
* null byte and fields.
*/
int fieldId = 0;
int structByteEnd = start + length;
byte nullByte = bytes[start];
int lastFieldByteEnd = start + 1;
// Go through all bytes in the byte[]
for (int i = 0; i < fields.length; i++) {
fieldIsNull[i] = true;
if ((nullByte & (1 << (i % 8))) != 0) {
fieldIsNull[i] = false;
LazyUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), bytes, lastFieldByteEnd,
recordInfo);
fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
fieldLength[i] = recordInfo.elementSize;
lastFieldByteEnd = fieldStart[i] + fieldLength[i];
}
// count how many fields are there
if (lastFieldByteEnd <= structByteEnd) {
fieldId++;
}
// next byte is a null byte if there are more bytes to go
if (7 == (i % 8)) {
if (lastFieldByteEnd < structByteEnd) {
nullByte = bytes[lastFieldByteEnd];
lastFieldByteEnd++;
} else {
// otherwise all null afterwards
nullByte = 0;
lastFieldByteEnd++;
}
}
}
// Extra bytes at the end?
if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
extraFieldWarned = true;
LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems.");
}
// Missing fields?
if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
missingFieldWarned = true;
LOG.warn("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId
+ "! Ignoring similar problems.");
}
Arrays.fill(fieldInited, false);
parsed = true;
}
/**
* Get one field out of the struct.
* If the field is a primitive field, return the actual object. Otherwise
* return the LazyObject. This is because PrimitiveObjectInspector does not
* have control over the object used by the user - the user simply directly
* use the Object instead of going through Object
* PrimitiveObjectInspector.get(Object).
*
* @param fieldID
* The field ID
* @return The field as a LazyObject
*/
public Object getField(int fieldID) {
if (!parsed) {
parse();
}
return uncheckedGetField(fieldID);
}
/**
* Get the field out of the row without checking parsed. This is called by
* both getField and getFieldsAsList.
*
* @param fieldID
* The id of the field starting from 0.
* @return The value of the field
*/
private Object uncheckedGetField(int fieldID) {
// Test the length first so in most cases we avoid doing a byte[]
// comparison.
if (fieldIsNull[fieldID]) {
return null;
}
if (!fieldInited[fieldID]) {
fieldInited[fieldID] = true;
fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
}
return fields[fieldID].getObject();
}
ArrayList<Object> cachedList;
/**
* Get the values of the fields as an ArrayList.
*
* @return The values of the fields as an ArrayList.
*/
public ArrayList<Object> getFieldsAsList() {
if (!parsed) {
parse();
}
if (cachedList == null) {
cachedList = new ArrayList<Object>();
} else {
cachedList.clear();
}
for (int i = 0; i < fields.length; i++) {
cachedList.add(uncheckedGetField(i));
}
return cachedList;
}
@Override
public Object getObject() {
return this;
}
}