blob: 0c64e5b80f3945f75b9439ae61fe1f10924224d6 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ORC_COLUMN_READER_HH
#define ORC_COLUMN_READER_HH
#include <unordered_map>
#include "orc/Vector.hh"
#include "ByteRLE.hh"
#include "Compression.hh"
#include "Timezone.hh"
#include "wrap/orc-proto-wrapper.hh"
namespace orc {
class StripeStreams {
public:
virtual ~StripeStreams();
/**
* Get the array of booleans for which columns are selected.
* @return the address of an array which contains true at the index of
* each columnId is selected.
*/
virtual const std::vector<bool> getSelectedColumns() const = 0;
/**
* Get the encoding for the given column for this stripe.
*/
virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0;
/**
* Get the stream for the given column/kind in this stripe.
* @param columnId the id of the column
* @param kind the kind of the stream
* @param shouldStream should the reading page the stream in
* @return the new stream
*/
virtual std::unique_ptr<SeekableInputStream>
getStream(uint64_t columnId,
proto::Stream_Kind kind,
bool shouldStream) const = 0;
/**
* Get the memory pool for this reader.
*/
virtual MemoryPool& getMemoryPool() const = 0;
/**
* Get the writer's timezone, so that we can convert their dates correctly.
*/
virtual const Timezone& getWriterTimezone() const = 0;
/**
* Get the error stream.
* @return a pointer to the stream that should get error messages
*/
virtual std::ostream* getErrorStream() const = 0;
/**
* Should the reader throw when the scale overflows when reading Hive 0.11
* decimals.
* @return true if it should throw
*/
virtual bool getThrowOnHive11DecimalOverflow() const = 0;
/**
* What is the scale forced on the Hive 0.11 decimals?
* @return the number of scale digits
*/
virtual int32_t getForcedScaleOnHive11Decimal() const = 0;
};
/**
* The interface for reading ORC data types.
*/
class ColumnReader {
protected:
std::unique_ptr<ByteRleDecoder> notNullDecoder;
uint64_t columnId;
MemoryPool& memoryPool;
public:
ColumnReader(const Type& type, StripeStreams& stipe);
virtual ~ColumnReader();
/**
* Skip number of specified rows.
* @param numValues the number of values to skip
* @return the number of non-null values skipped
*/
virtual uint64_t skip(uint64_t numValues);
/**
* Read the next group of values into this rowBatch.
* @param rowBatch the memory to read into.
* @param numValues the number of values to read
* @param notNull if null, all values are not null. Otherwise, it is
* a mask (with at least numValues bytes) for which values to
* set.
*/
virtual void next(ColumnVectorBatch& rowBatch,
uint64_t numValues,
char* notNull);
/**
* Read the next group of values without decoding
* @param rowBatch the memory to read into.
* @param numValues the number of values to read
* @param notNull if null, all values are not null. Otherwise, it is
* a mask (with at least numValues bytes) for which values to
* set.
*/
virtual void nextEncoded(ColumnVectorBatch& rowBatch,
uint64_t numValues,
char* notNull)
{
rowBatch.isEncoded = false;
next(rowBatch, numValues, notNull);
}
/**
* Seek to beginning of a row group in the current stripe
* @param positions a list of PositionProviders storing the positions
*/
virtual void seekToRowGroup(
std::unordered_map<uint64_t, PositionProvider>& positions);
};
/**
* Create a reader for the given stripe.
*/
std::unique_ptr<ColumnReader> buildReader(const Type& type,
StripeStreams& stripe);
}
#endif