blob: 57469edcc42dc65ba16f4d1df026e86b9714a1d5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.store.easy.text.reader;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.exec.physical.resultSet.RowSetLoader;
import org.apache.drill.exec.store.easy.text.TextFormatPlugin;
import org.apache.drill.exec.vector.accessor.ArrayWriter;
import org.apache.drill.exec.vector.accessor.ScalarWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Class is responsible for generating record batches for text file inputs. We generate
* a record batch with a single vector of type repeated varchar vector. Each record is a single
* value within the vector containing all the fields in the record as individual array elements.
*/
public class RepeatedVarCharOutput extends BaseFieldOutput {
private static final Logger logger = LoggerFactory.getLogger(RepeatedVarCharOutput.class);
private final ScalarWriter columnWriter;
/**
* Provide the row set loader (which must have just one repeated Varchar
* column) and an optional array projection mask.
*
* @param loader row set loader
* @param projectionMask array projection mask
*/
public RepeatedVarCharOutput(RowSetLoader loader, boolean[] projectionMask) {
super(loader,
maxField(loader, projectionMask),
projectionMask);
ArrayWriter arrayWriter = writer.array(0);
columnWriter = arrayWriter.scalar();
}
private static int maxField(RowSetLoader loader, boolean[] projectionMask) {
// If the one and only field (`columns`) is not selected, then this
// is a COUNT(*) or similar query. Select nothing.
if (! loader.column(0).isProjected()) {
return -1;
}
// If this is SELECT * or SELECT `columns` query, project all
// possible fields.
if (projectionMask == null) {
return TextFormatPlugin.MAXIMUM_NUMBER_COLUMNS;
}
// Else, this is a SELECT columns[x], columns[y], ... query.
// Project only the requested element members (fields).
int end = projectionMask.length - 1;
while (end >= 0 && ! projectionMask[end]) {
end--;
}
return end;
}
/**
* Write the value into an array position. Rules:
* <ul>
* <li>If there is no projection mask, collect all columns.</li>
* <li>If a selection mask is present, we previously found the index
* of the last projection column (<tt>maxField</tt>). If the current
* column is beyond that number, ignore the data and stop accepting
* columns.</li>
* <li>If the column is projected, add the data to the array.</li>
* <li>If the column is not projected, add a blank value to the
* array.</li>
* </ul>
* The above ensures that we leave no holes in the portion of the
* array that is projected (by adding blank columns where needed),
* and we just ignore columns past the end of the projected part
* of the array. (No need to fill holes at the end.)
*/
@Override
public boolean endField() {
// Skip the field if past the set of projected fields.
if (currentFieldIndex > maxField) {
return false;
}
// If the field is projected, save it.
if (fieldProjected) {
// Repeated var char will create as many entries as there are columns.
// If this would exceed the maximum, issue an error. Note that we do
// this only if all fields are selected; the same query will succeed if
// the user does a COUNT(*) or SELECT columns[x], columns[y], ...
if (currentFieldIndex > TextFormatPlugin.MAXIMUM_NUMBER_COLUMNS) {
throw UserException
.unsupportedError()
.message("Text file contains too many fields")
.addContext("Limit", TextFormatPlugin.MAXIMUM_NUMBER_COLUMNS)
.build(logger);
}
// Save the field.
writeToVector();
} else {
// The field is not projected.
// Must write a value into this array position, but
// the value should be empty.
columnWriter.setBytes(fieldBytes, 0);
}
// Return whether the rest of the fields should be read.
return super.endField();
}
@Override
protected ScalarWriter columnWriter() {
return columnWriter;
}
}