blob: 4b0530df4a5aaa2e34f3ea7576631292d77bab80 [file]
/**
* Copyright 2013 Lukas Nalezenec
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package parquet.proto;
import com.google.protobuf.Descriptors;
import com.google.protobuf.Message;
import com.twitter.elephantbird.util.Protobufs;
import parquet.Log;
import parquet.schema.ConversionPatterns;
import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.OriginalType;
import parquet.schema.PrimitiveType;
import parquet.schema.Type;
import java.util.ArrayList;
import java.util.List;
import static com.google.protobuf.Descriptors.FieldDescriptor.JavaType;
import static parquet.schema.PrimitiveType.PrimitiveTypeName;
/**
* <p/>
* Converts a Protocol Buffer Descriptor into a Parquet schema.
*
* @author Lukas Nalezenec
*/
public class ProtoSchemaConverter {
private static final Log LOG = Log.getLog(ProtoSchemaConverter.class);
public MessageType convert(Class<? extends Message> protobufClass) {
LOG.debug("Converting protocol buffer class \"" + protobufClass + "\" to parquet schema.");
Descriptors.Descriptor descriptor = Protobufs.getMessageDescriptor(protobufClass);
MessageType messageType = new MessageType(descriptor.getFullName(), convertFields(descriptor.getFields()));
LOG.debug("Converter info:\n " + descriptor.toProto() + " was converted to \n" + messageType);
return messageType;
}
/* Iterates over list of fields. **/
private List<Type> convertFields(List<Descriptors.FieldDescriptor> fieldDescriptors) {
List<Type> types = new ArrayList<Type>();
for (Descriptors.FieldDescriptor fieldDescriptor : fieldDescriptors) {
String fieldName = fieldDescriptor.getName();
Type.Repetition repetition = getRepetition(fieldDescriptor);
Type type = convertScalarField(fieldName, fieldDescriptor, repetition);
types.add(type);
}
return types;
}
private Type.Repetition getRepetition(Descriptors.FieldDescriptor descriptor) {
Type.Repetition repetition;
if (descriptor.isRequired()) {
repetition = Type.Repetition.REQUIRED;
} else if (descriptor.isRepeated()) {
repetition = Type.Repetition.REPEATED;
} else {
repetition = Type.Repetition.OPTIONAL;
}
return repetition;
}
private Type convertScalarField(String fieldName, Descriptors.FieldDescriptor descriptor, Type.Repetition repetition) {
JavaType javaType = descriptor.getJavaType();
switch (javaType) {
case BOOLEAN : return primitive(fieldName, PrimitiveTypeName.BOOLEAN, repetition);
case INT : return primitive(fieldName, PrimitiveTypeName.INT32, repetition);
case LONG : return primitive(fieldName, PrimitiveTypeName.INT64, repetition);
case FLOAT : return primitive(fieldName, PrimitiveTypeName.FLOAT, repetition);
case DOUBLE: return primitive(fieldName, PrimitiveTypeName.DOUBLE, repetition);
case BYTE_STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition);
case STRING: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.UTF8);
case MESSAGE: {
Descriptors.Descriptor messageDescriptor = descriptor.getMessageType();
List<Type> fields = convertFields(messageDescriptor.getFields());
return new GroupType(repetition, fieldName, fields);
}
case ENUM: return primitive(fieldName, PrimitiveTypeName.BINARY, repetition, OriginalType.ENUM);
}
throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType + " fieldName " + fieldName);
}
/**
* Makes primitive type with additional information. Used for String and Binary types
*/
private Type primitive(String name, PrimitiveTypeName primitive,
Type.Repetition repetition, OriginalType originalType) {
return new PrimitiveType(repetition, primitive, name, originalType);
}
private PrimitiveType primitive(String name, PrimitiveTypeName
primitive, Type.Repetition repetition) {
return new PrimitiveType(repetition, primitive, name, null);
}
}