blob: 5c0e973de5a96abc4256b45457fb887641ee1118 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "orc/Vector.hh"
#include "Adaptor.hh"
#include "orc/Exceptions.hh"
#include <iostream>
#include <sstream>
#include <cstdlib>
namespace orc {
ColumnVectorBatch::ColumnVectorBatch(uint64_t cap,
MemoryPool& pool
): capacity(cap),
numElements(0),
notNull(pool, cap),
hasNulls(false),
isEncoded(false),
memoryPool(pool) {
std::memset(notNull.data(), 1, capacity);
}
ColumnVectorBatch::~ColumnVectorBatch() {
// PASS
}
void ColumnVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
capacity = cap;
notNull.resize(cap);
}
}
void ColumnVectorBatch::clear() {
numElements = 0;
}
uint64_t ColumnVectorBatch::getMemoryUsage() {
return static_cast<uint64_t>(notNull.capacity() * sizeof(char));
}
bool ColumnVectorBatch::hasVariableLength() {
return false;
}
LongVectorBatch::LongVectorBatch(uint64_t _capacity, MemoryPool& pool
): ColumnVectorBatch(_capacity, pool),
data(pool, _capacity) {
// PASS
}
LongVectorBatch::~LongVectorBatch() {
// PASS
}
std::string LongVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Long vector <" << numElements << " of " << capacity << ">";
return buffer.str();
}
void LongVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
data.resize(cap);
}
}
void LongVectorBatch::clear() {
numElements = 0;
}
uint64_t LongVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage() +
static_cast<uint64_t>(data.capacity() * sizeof(int64_t));
}
DoubleVectorBatch::DoubleVectorBatch(uint64_t _capacity, MemoryPool& pool
): ColumnVectorBatch(_capacity, pool),
data(pool, _capacity) {
// PASS
}
DoubleVectorBatch::~DoubleVectorBatch() {
// PASS
}
std::string DoubleVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Double vector <" << numElements << " of " << capacity << ">";
return buffer.str();
}
void DoubleVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
data.resize(cap);
}
}
void DoubleVectorBatch::clear() {
numElements = 0;
}
uint64_t DoubleVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(data.capacity() * sizeof(double));
}
StringDictionary::StringDictionary(MemoryPool& pool)
: dictionaryBlob(pool),
dictionaryOffset(pool) {
// PASS
}
EncodedStringVectorBatch::EncodedStringVectorBatch(uint64_t _capacity,
MemoryPool& pool)
: StringVectorBatch(_capacity, pool),
dictionary(),
index(pool, _capacity) {
// PASS
}
EncodedStringVectorBatch::~EncodedStringVectorBatch() {
// PASS
}
std::string EncodedStringVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Encoded string vector <" << numElements << " of " << capacity << ">";
return buffer.str();
}
StringVectorBatch::StringVectorBatch(uint64_t _capacity, MemoryPool& pool
): ColumnVectorBatch(_capacity, pool),
data(pool, _capacity),
length(pool, _capacity),
blob(pool) {
// PASS
}
StringVectorBatch::~StringVectorBatch() {
// PASS
}
std::string StringVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Byte vector <" << numElements << " of " << capacity << ">";
return buffer.str();
}
void StringVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
data.resize(cap);
length.resize(cap);
}
}
void StringVectorBatch::clear() {
numElements = 0;
}
uint64_t StringVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(data.capacity() * sizeof(char*)
+ length.capacity() * sizeof(int64_t));
}
StructVectorBatch::StructVectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool) {
// PASS
}
StructVectorBatch::~StructVectorBatch() {
for (uint64_t i=0; i<this->fields.size(); i++) {
delete this->fields[i];
}
}
std::string StructVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Struct vector <" << numElements << " of " << capacity
<< "; ";
for(std::vector<ColumnVectorBatch*>::const_iterator ptr=fields.begin();
ptr != fields.end(); ++ptr) {
buffer << (*ptr)->toString() << "; ";
}
buffer << ">";
return buffer.str();
}
void StructVectorBatch::resize(uint64_t cap) {
ColumnVectorBatch::resize(cap);
}
void StructVectorBatch::clear() {
for(size_t i=0; i < fields.size(); i++) {
fields[i]->clear();
}
numElements = 0;
}
uint64_t StructVectorBatch::getMemoryUsage() {
uint64_t memory = ColumnVectorBatch::getMemoryUsage();
for (unsigned int i=0; i < fields.size(); i++) {
memory += fields[i]->getMemoryUsage();
}
return memory;
}
bool StructVectorBatch::hasVariableLength() {
for (unsigned int i=0; i < fields.size(); i++) {
if (fields[i]->hasVariableLength()) {
return true;
}
}
return false;
}
ListVectorBatch::ListVectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
offsets(pool, cap+1) {
// PASS
}
ListVectorBatch::~ListVectorBatch() {
// PASS
}
std::string ListVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "List vector <" << elements->toString() << " with "
<< numElements << " of " << capacity << ">";
return buffer.str();
}
void ListVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
offsets.resize(cap + 1);
}
}
void ListVectorBatch::clear() {
numElements = 0;
elements->clear();
}
uint64_t ListVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
+ elements->getMemoryUsage();
}
bool ListVectorBatch::hasVariableLength() {
return true;
}
MapVectorBatch::MapVectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
offsets(pool, cap+1) {
// PASS
}
MapVectorBatch::~MapVectorBatch() {
// PASS
}
std::string MapVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Map vector <" << keys->toString() << ", "
<< elements->toString() << " with "
<< numElements << " of " << capacity << ">";
return buffer.str();
}
void MapVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
offsets.resize(cap + 1);
}
}
void MapVectorBatch::clear() {
keys->clear();
elements->clear();
numElements = 0;
}
uint64_t MapVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(offsets.capacity() * sizeof(int64_t))
+ keys->getMemoryUsage()
+ elements->getMemoryUsage();
}
bool MapVectorBatch::hasVariableLength() {
return true;
}
UnionVectorBatch::UnionVectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
tags(pool, cap),
offsets(pool, cap) {
// PASS
}
UnionVectorBatch::~UnionVectorBatch() {
for (uint64_t i=0; i < children.size(); i++) {
delete children[i];
}
}
std::string UnionVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Union vector <";
for(size_t i=0; i < children.size(); ++i) {
if (i != 0) {
buffer << ", ";
}
buffer << children[i]->toString();
}
buffer << "; with " << numElements << " of " << capacity << ">";
return buffer.str();
}
void UnionVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
tags.resize(cap);
offsets.resize(cap);
}
}
void UnionVectorBatch::clear() {
for(size_t i=0; i < children.size(); i++) {
children[i]->clear();
}
numElements = 0;
}
uint64_t UnionVectorBatch::getMemoryUsage() {
uint64_t memory = ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(tags.capacity() * sizeof(unsigned char)
+ offsets.capacity() * sizeof(uint64_t));
for(size_t i=0; i < children.size(); ++i) {
memory += children[i]->getMemoryUsage();
}
return memory;
}
bool UnionVectorBatch::hasVariableLength() {
for(size_t i=0; i < children.size(); ++i) {
if (children[i]->hasVariableLength()) {
return true;
}
}
return false;
}
Decimal64VectorBatch::Decimal64VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
precision(0),
scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
}
Decimal64VectorBatch::~Decimal64VectorBatch() {
// PASS
}
std::string Decimal64VectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Decimal64 vector with "
<< numElements << " of " << capacity << ">";
return buffer.str();
}
void Decimal64VectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
values.resize(cap);
readScales.resize(cap);
}
}
void Decimal64VectorBatch::clear() {
numElements = 0;
}
uint64_t Decimal64VectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(
(values.capacity() + readScales.capacity()) * sizeof(int64_t));
}
Decimal128VectorBatch::Decimal128VectorBatch(uint64_t cap, MemoryPool& pool
): ColumnVectorBatch(cap, pool),
precision(0),
scale(0),
values(pool, cap),
readScales(pool, cap) {
// PASS
}
Decimal128VectorBatch::~Decimal128VectorBatch() {
// PASS
}
std::string Decimal128VectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Decimal128 vector with "
<< numElements << " of " << capacity << ">";
return buffer.str();
}
void Decimal128VectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
values.resize(cap);
readScales.resize(cap);
}
}
void Decimal128VectorBatch::clear() {
numElements = 0;
}
uint64_t Decimal128VectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(values.capacity() * sizeof(Int128)
+ readScales.capacity() * sizeof(int64_t));
}
Decimal::Decimal(const Int128& _value,
int32_t _scale): value(_value), scale(_scale) {
// PASS
}
Decimal::Decimal(const std::string& str) {
std::size_t foundPoint = str.find(".");
// no decimal point, it is int
if(foundPoint == std::string::npos){
value = Int128(str);
scale = 0;
}else{
std::string copy(str);
scale = static_cast<int32_t>(str.length() - foundPoint - 1);
value = Int128(copy.replace(foundPoint, 1, ""));
}
}
Decimal::Decimal() : value(0), scale(0) {
// PASS
}
std::string Decimal::toString(bool trimTrailingZeros) const {
return value.toDecimalString(scale, trimTrailingZeros);
}
TimestampVectorBatch::TimestampVectorBatch(uint64_t _capacity,
MemoryPool& pool
): ColumnVectorBatch(_capacity,
pool),
data(pool, _capacity),
nanoseconds(pool, _capacity) {
// PASS
}
TimestampVectorBatch::~TimestampVectorBatch() {
// PASS
}
std::string TimestampVectorBatch::toString() const {
std::ostringstream buffer;
buffer << "Timestamp vector <" << numElements << " of " << capacity << ">";
return buffer.str();
}
void TimestampVectorBatch::resize(uint64_t cap) {
if (capacity < cap) {
ColumnVectorBatch::resize(cap);
data.resize(cap);
nanoseconds.resize(cap);
}
}
void TimestampVectorBatch::clear() {
numElements = 0;
}
uint64_t TimestampVectorBatch::getMemoryUsage() {
return ColumnVectorBatch::getMemoryUsage()
+ static_cast<uint64_t>(
(data.capacity() + nanoseconds.capacity()) * sizeof(int64_t));
}
}