| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package edu.uci.ics.hivesterix.serde.lazy; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.List; |
| |
| import org.apache.commons.logging.Log; |
| import org.apache.commons.logging.LogFactory; |
| import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructField; |
| import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; |
| |
| import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo; |
| import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector; |
| |
| /** |
| * LazyStruct is serialized as follows: start A B A B A B end bytes[] -> |
| * |-----|---------|--- ... ---|-----|---------| |
| * Section A is one null-byte, corresponding to eight struct fields in Section |
| * B. Each bit indicates whether the corresponding field is null (0) or not null |
| * (1). Each field is a LazyObject. |
| * Following B, there is another section A and B. This pattern repeats until the |
| * all struct fields are serialized. |
| */ |
| public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> { |
| |
| private static Log LOG = LogFactory.getLog(LazyStruct.class.getName()); |
| |
| /** |
| * Whether the data is already parsed or not. |
| */ |
| boolean parsed; |
| |
| /** |
| * The fields of the struct. |
| */ |
| @SuppressWarnings("rawtypes") |
| LazyObject[] fields; |
| |
| /** |
| * Whether a field is initialized or not. |
| */ |
| boolean[] fieldInited; |
| |
| /** |
| * Whether a field is null or not. Because length is 0 does not means the |
| * field is null. In particular, a 0-length string is not null. |
| */ |
| boolean[] fieldIsNull; |
| |
| /** |
| * The start positions and lengths of struct fields. Only valid when the |
| * data is parsed. |
| */ |
| int[] fieldStart; |
| int[] fieldLength; |
| |
| /** |
| * Construct a LazyStruct object with an ObjectInspector. |
| */ |
| protected LazyStruct(LazyStructObjectInspector oi) { |
| super(oi); |
| } |
| |
| @Override |
| public void init(byte[] bytes, int start, int length) { |
| super.init(bytes, start, length); |
| parsed = false; |
| } |
| |
| RecordInfo recordInfo = new LazyUtils.RecordInfo(); |
| boolean missingFieldWarned = false; |
| boolean extraFieldWarned = false; |
| |
| /** |
| * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and |
| * fieldIsNull. |
| */ |
| private void parse() { |
| |
| List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs(); |
| |
| if (fields == null) { |
| fields = new LazyObject[fieldRefs.size()]; |
| for (int i = 0; i < fields.length; i++) { |
| ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector(); |
| fields[i] = insp == null ? null : LazyFactory.createLazyObject(insp); |
| } |
| fieldInited = new boolean[fields.length]; |
| fieldIsNull = new boolean[fields.length]; |
| fieldStart = new int[fields.length]; |
| fieldLength = new int[fields.length]; |
| } |
| |
| /** |
| * Please note that one null byte is followed by eight fields, then more |
| * null byte and fields. |
| */ |
| |
| int fieldId = 0; |
| int structByteEnd = start + length; |
| |
| byte nullByte = bytes[start]; |
| int lastFieldByteEnd = start + 1; |
| // Go through all bytes in the byte[] |
| for (int i = 0; i < fields.length; i++) { |
| fieldIsNull[i] = true; |
| if ((nullByte & (1 << (i % 8))) != 0) { |
| fieldIsNull[i] = false; |
| LazyUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), bytes, lastFieldByteEnd, |
| recordInfo); |
| fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset; |
| fieldLength[i] = recordInfo.elementSize; |
| lastFieldByteEnd = fieldStart[i] + fieldLength[i]; |
| } |
| |
| // count how many fields are there |
| if (lastFieldByteEnd <= structByteEnd) { |
| fieldId++; |
| } |
| // next byte is a null byte if there are more bytes to go |
| if (7 == (i % 8)) { |
| if (lastFieldByteEnd < structByteEnd) { |
| nullByte = bytes[lastFieldByteEnd]; |
| lastFieldByteEnd++; |
| } else { |
| // otherwise all null afterwards |
| nullByte = 0; |
| lastFieldByteEnd++; |
| } |
| } |
| } |
| |
| // Extra bytes at the end? |
| if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) { |
| extraFieldWarned = true; |
| LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems."); |
| } |
| |
| // Missing fields? |
| if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) { |
| missingFieldWarned = true; |
| LOG.warn("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId |
| + "! Ignoring similar problems."); |
| } |
| |
| Arrays.fill(fieldInited, false); |
| parsed = true; |
| } |
| |
| /** |
| * Get one field out of the struct. |
| * If the field is a primitive field, return the actual object. Otherwise |
| * return the LazyObject. This is because PrimitiveObjectInspector does not |
| * have control over the object used by the user - the user simply directly |
| * use the Object instead of going through Object |
| * PrimitiveObjectInspector.get(Object). |
| * |
| * @param fieldID |
| * The field ID |
| * @return The field as a LazyObject |
| */ |
| public Object getField(int fieldID) { |
| if (!parsed) { |
| parse(); |
| } |
| return uncheckedGetField(fieldID); |
| } |
| |
| /** |
| * Get the field out of the row without checking parsed. This is called by |
| * both getField and getFieldsAsList. |
| * |
| * @param fieldID |
| * The id of the field starting from 0. |
| * @return The value of the field |
| */ |
| private Object uncheckedGetField(int fieldID) { |
| // Test the length first so in most cases we avoid doing a byte[] |
| // comparison. |
| if (fieldIsNull[fieldID]) { |
| return null; |
| } |
| if (!fieldInited[fieldID]) { |
| fieldInited[fieldID] = true; |
| fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]); |
| } |
| return fields[fieldID].getObject(); |
| } |
| |
| ArrayList<Object> cachedList; |
| |
| /** |
| * Get the values of the fields as an ArrayList. |
| * |
| * @return The values of the fields as an ArrayList. |
| */ |
| public ArrayList<Object> getFieldsAsList() { |
| if (!parsed) { |
| parse(); |
| } |
| if (cachedList == null) { |
| cachedList = new ArrayList<Object>(); |
| } else { |
| cachedList.clear(); |
| } |
| for (int i = 0; i < fields.length; i++) { |
| cachedList.add(uncheckedGetField(i)); |
| } |
| return cachedList; |
| } |
| |
| @Override |
| public Object getObject() { |
| return this; |
| } |
| } |