blob: f654b776bf81f22e6a994b9f03134f014e07a4fe [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/cfile/cfile_util.h"
#include <algorithm>
#include <cstdint>
#include <optional>
#include <string>
#include <glog/logging.h>
#include "kudu/cfile/cfile_reader.h"
#include "kudu/common/column_materialization_context.h"
#include "kudu/common/columnblock.h"
#include "kudu/common/rowblock.h"
#include "kudu/common/rowblock_memory.h"
#include "kudu/common/types.h"
#include "kudu/gutil/port.h"
#include "kudu/util/bitmap.h"
#include "kudu/util/mem_tracker.h"
namespace kudu {
namespace cfile {
using std::string;
static const int kBufSize = 1024*1024;
WriterOptions::WriterOptions()
: index_block_size(32*1024),
block_restart_interval(16),
write_posidx(false),
write_validx(false),
optimize_index_keys(true),
validx_key_encoder(std::nullopt) {
}
Status DumpIterator(const CFileReader& reader,
CFileIterator* it,
std::ostream* out,
int num_rows,
int indent) {
RowBlockMemory mem(8192);
uint8_t buf[kBufSize];
const TypeInfo *type = reader.type_info();
size_t max_rows = kBufSize/type->size();
uint8_t nulls[BitmapSize(max_rows)];
ColumnBlock cb(type, reader.is_nullable() ? nulls : nullptr, buf, max_rows, &mem);
SelectionVector sel(max_rows);
ColumnMaterializationContext ctx(0, nullptr, &cb, &sel);
string strbuf;
size_t count = 0;
while (it->HasNext()) {
size_t n = num_rows == 0 ? max_rows : std::min(max_rows, num_rows - count);
if (n == 0) break;
RETURN_NOT_OK(it->CopyNextValues(&n, &ctx));
if (reader.is_nullable()) {
for (size_t i = 0; i < n; i++) {
strbuf.append(indent, ' ');
const void *ptr = cb.nullable_cell_ptr(i);
if (ptr != nullptr) {
type->AppendDebugStringForValue(ptr, &strbuf);
} else {
strbuf.append("NULL");
}
strbuf.push_back('\n');
}
} else {
for (size_t i = 0; i < n; i++) {
strbuf.append(indent, ' ');
type->AppendDebugStringForValue(cb.cell_ptr(i), &strbuf);
strbuf.push_back('\n');
}
}
*out << strbuf;
strbuf.clear();
mem.Reset();
count += n;
}
VLOG(1) << "Dumped " << count << " rows";
return Status::OK();
}
ReaderOptions::ReaderOptions()
: parent_mem_tracker(MemTracker::GetRootTracker()) {
}
size_t CommonPrefixLength(const Slice& slice_a, const Slice& slice_b) {
// This implementation is modeled after strings::fastmemcmp_inlined().
int len = std::min(slice_a.size(), slice_b.size());
const uint8_t* a = slice_a.data();
const uint8_t* b = slice_b.data();
const uint8_t* a_limit = a + len;
const size_t sizeof_uint64 = sizeof(uint64_t);
// Move forward 8 bytes at a time until finding an unequal portion.
while (a + sizeof_uint64 <= a_limit &&
UNALIGNED_LOAD64(a) == UNALIGNED_LOAD64(b)) {
a += sizeof_uint64;
b += sizeof_uint64;
}
// Same, 4 bytes at a time.
const size_t sizeof_uint32 = sizeof(uint32_t);
while (a + sizeof_uint32 <= a_limit &&
UNALIGNED_LOAD32(a) == UNALIGNED_LOAD32(b)) {
a += sizeof_uint32;
b += sizeof_uint32;
}
// Now one byte at a time. We could do a 2-bytes-at-a-time loop,
// but we're following the example of fastmemcmp_inlined(). The benefit of
// 2-at-a-time likely doesn't outweigh the cost of added code size.
while (a < a_limit &&
*a == *b) {
a++;
b++;
}
return a - slice_a.data();
}
void GetSeparatingKey(const Slice& left, Slice* right) {
DCHECK_LE(left, *right);
size_t cpl = CommonPrefixLength(left, *right);
right->truncate(cpl == right->size() ? cpl : cpl + 1);
}
} // namespace cfile
} // namespace kudu