| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.parquet.cli.csv; |
| |
| import org.apache.parquet.cli.util.RecordException; |
| import org.apache.parquet.cli.util.Schemas; |
| import org.apache.avro.AvroRuntimeException; |
| import org.apache.avro.Schema; |
| import org.apache.avro.generic.GenericData; |
| import org.apache.avro.generic.IndexedRecord; |
| import org.apache.avro.reflect.ReflectData; |
| import java.util.List; |
| |
| class RecordBuilder<E> { |
| private final Schema schema; |
| private final Class<E> recordClass; |
| private final Schema.Field[] fields; |
| private final int[] indexes; // Record position to CSV field position |
| |
| public RecordBuilder(Schema schema, Class<E> recordClass, List<String> header) { |
| this.schema = schema; |
| this.recordClass = recordClass; |
| |
| // initialize the index and field arrays |
| fields = schema.getFields().toArray(new Schema.Field[0]); |
| indexes = new int[fields.length]; |
| |
| if (header != null) { |
| for (int i = 0; i < fields.length; i += 1) { |
| fields[i] = schema.getFields().get(i); |
| indexes[i] = Integer.MAX_VALUE; // never present in the row |
| } |
| |
| // there's a header in next |
| for (int i = 0; i < header.size(); i += 1) { |
| Schema.Field field = schema.getField(header.get(i)); |
| if (field != null) { |
| indexes[field.pos()] = i; |
| } |
| } |
| |
| } else { |
| // without a header, map to fields by position |
| for (int i = 0; i < fields.length; i += 1) { |
| fields[i] = schema.getFields().get(i); |
| indexes[i] = i; |
| } |
| } |
| } |
| |
| public E makeRecord(String[] fields, E reuse) { |
| E record = reuse; |
| if (record == null) { |
| record = newRecordInstance(); |
| } |
| |
| if (record instanceof IndexedRecord) { |
| fillIndexed((IndexedRecord) record, fields); |
| } else { |
| fillReflect(record, fields); |
| } |
| |
| return record; |
| } |
| |
| @SuppressWarnings("unchecked") |
| private E newRecordInstance() { |
| if (recordClass != GenericData.Record.class && !recordClass.isInterface()) { |
| E record = (E) ReflectData.newInstance(recordClass, schema); |
| if (record != null) { |
| return record; |
| } |
| } |
| return (E) new GenericData.Record(schema); |
| } |
| |
| private void fillIndexed(IndexedRecord record, String[] data) { |
| for (int i = 0; i < indexes.length; i += 1) { |
| int index = indexes[i]; |
| record.put(i, |
| makeValue(index < data.length ? data[index] : null, fields[i])); |
| } |
| } |
| |
| private void fillReflect(Object record, String[] data) { |
| for (int i = 0; i < indexes.length; i += 1) { |
| Schema.Field field = fields[i]; |
| int index = indexes[i]; |
| Object value = makeValue(index < data.length ? data[index] : null, field); |
| ReflectData.get().setField(record, field.name(), i, value); |
| } |
| } |
| |
| private static Object makeValue(String string, Schema.Field field) { |
| try { |
| Object value = makeValue(string, field.schema()); |
| if (value != null || Schemas.nullOk(field.schema())) { |
| return value; |
| } else { |
| // this will fail if there is no default value |
| return ReflectData.get().getDefaultValue(field); |
| } |
| } catch (RecordException e) { |
| // add the field name to the error message |
| throw new RecordException(String.format( |
| "Cannot convert field %s", field.name()), e); |
| } catch (NumberFormatException e) { |
| throw new RecordException(String.format( |
| "Field %s: value not a %s: '%s'", |
| field.name(), field.schema(), string), e); |
| } catch (AvroRuntimeException e) { |
| throw new RecordException(String.format( |
| "Field %s: cannot make %s value: '%s'", |
| field.name(), field.schema(), string), e); |
| } |
| } |
| |
| /** |
| * Returns a the value as the first matching schema type or null. |
| * |
| * Note that if the value may be null even if the schema does not allow the |
| * value to be null. |
| * |
| * @param string a String representation of the value |
| * @param schema a Schema |
| * @return the string coerced to the correct type from the schema or null |
| */ |
| private static Object makeValue(String string, Schema schema) { |
| if (string == null) { |
| return null; |
| } |
| |
| try { |
| switch (schema.getType()) { |
| case BOOLEAN: |
| return Boolean.valueOf(string); |
| case STRING: |
| return string; |
| case FLOAT: |
| return Float.valueOf(string); |
| case DOUBLE: |
| return Double.valueOf(string); |
| case INT: |
| return Integer.valueOf(string); |
| case LONG: |
| return Long.valueOf(string); |
| case ENUM: |
| // TODO: translate to enum class |
| if (schema.hasEnumSymbol(string)) { |
| return string; |
| } else { |
| try { |
| return schema.getEnumSymbols().get(Integer.parseInt(string)); |
| } catch (IndexOutOfBoundsException ex) { |
| return null; |
| } |
| } |
| case UNION: |
| Object value = null; |
| for (Schema possible : schema.getTypes()) { |
| value = makeValue(string, possible); |
| if (value != null) { |
| return value; |
| } |
| } |
| return null; |
| case NULL: |
| return null; |
| default: |
| // FIXED, BYTES, MAP, ARRAY, RECORD are not supported |
| throw new RecordException( |
| "Unsupported field type:" + schema.getType()); |
| } |
| } catch (NumberFormatException e) { |
| // empty string is considered null for numeric types |
| if (string.isEmpty()) { |
| return null; |
| } else { |
| throw e; |
| } |
| } |
| } |
| } |