// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.
//
// Simplistic block encoding for strings.
//
// The block consists of:
// Header:
//   ordinal_pos (32-bit fixed)
//   num_elems (32-bit fixed)
//   offsets_pos (32-bit fixed): position of the first offset, relative to block start
// Strings:
//   raw strings that were written
// Offsets:  [pointed to by offsets_pos]
//   gvint-encoded offsets pointing to the beginning of each string
#ifndef KUDU_CFILE_BINARY_PLAIN_BLOCK_H
#define KUDU_CFILE_BINARY_PLAIN_BLOCK_H

#include <sys/types.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <vector>

#include <glog/logging.h>

#include "kudu/cfile/block_encodings.h"
#include "kudu/common/rowid.h"
#include "kudu/gutil/port.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/util/faststring.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"

namespace kudu {

class ColumnDataView;
class ColumnMaterializationContext;
class SelectionVectorView;

namespace cfile {

class BlockHandle;
struct WriterOptions;

class BinaryPlainBlockBuilder final : public BlockBuilder {
 public:
  explicit BinaryPlainBlockBuilder(const WriterOptions *options);
  virtual ~BinaryPlainBlockBuilder();

  bool IsBlockFull() const override;

  int Add(const uint8_t *vals, size_t count) OVERRIDE;

  void Finish(rowid_t ordinal_pos, std::vector<Slice>* slices) override;

  void Reset() OVERRIDE;

  size_t Count() const OVERRIDE;

  // Return the key at index idx.
  // key should be a Slice*
  Status GetKeyAtIdx(void* key_void, int idx) const;

  // Return the first added key.
  // key should be a Slice*
  Status GetFirstKey(void* key) const OVERRIDE;

  // Return the last added key.
  // key should be a Slice*
  Status GetLastKey(void* key) const OVERRIDE;

  // Length of a header.
  static constexpr size_t kHeaderSize = sizeof(uint32_t) * 3;

 private:
  faststring buffer_;

  size_t end_of_data_offset_;
  size_t size_estimate_;

  // Offsets of each entry, relative to the start of the block
  std::vector<uint32_t> offsets_;

  bool finished_;

  const WriterOptions *options_;

};

class BinaryPlainBlockDecoder final : public BlockDecoder {
 public:
  explicit BinaryPlainBlockDecoder(scoped_refptr<BlockHandle> block);
  virtual ~BinaryPlainBlockDecoder();

  virtual Status ParseHeader() OVERRIDE;
  virtual void SeekToPositionInBlock(uint pos) OVERRIDE;
  virtual Status SeekAtOrAfterValue(const void *value,
                                    bool *exact_match) OVERRIDE;
  Status CopyNextValues(size_t *n, ColumnDataView *dst) OVERRIDE;
  Status CopyNextAndEval(size_t* n,
                         ColumnMaterializationContext* ctx,
                         SelectionVectorView* sel,
                         ColumnDataView* dst) override;

  virtual bool HasNext() const OVERRIDE {
    DCHECK(parsed_);
    return cur_idx_ < num_elems_;
  }

  virtual size_t Count() const OVERRIDE {
    DCHECK(parsed_);
    return num_elems_;
  }

  virtual size_t GetCurrentIndex() const OVERRIDE {
    DCHECK(parsed_);
    return cur_idx_;
  }

  virtual rowid_t GetFirstRowId() const OVERRIDE {
    return ordinal_pos_base_;
  }

  Slice string_at_index(size_t idx) const {
    const uint32_t str_offset = offset(idx);
    uint32_t len = offset(idx + 1) - str_offset;
    return Slice(&data_[str_offset], len);
  }

  const scoped_refptr<BlockHandle>& block_handle() {
    return block_;
  }

  // Minimum length of a header.
  static const size_t kMinHeaderSize = sizeof(uint32_t) * 3;

 private:
  // Helper template for handling batches of rows. CellHandler is a lambda that
  // gets called on every cell. When decoder evaluation is enabled, it
  // evaluates whether or not the string should be copied and sets a
  // SelectionVectorView bit at the appropriate location. When decoder
  // evaluation is disabled, it copies the cell's string to dst.
  template <typename CellHandler>
  Status HandleBatch(size_t* n, ColumnDataView* dst, CellHandler c);

  // Return the offset within 'data_' where the string value with index 'idx'
  // can be found.
  uint32_t offset(int idx) const {
    const uint8_t* p = &offsets_buf_[idx * sizeof(uint32_t)];
    uint32_t ret;
    memcpy(&ret, p, sizeof(uint32_t));
    return ret;
  }

  scoped_refptr<BlockHandle> block_;
  Slice data_;
  bool parsed_;

  // A buffer for an array of 32-bit integers for the offsets of the underlying
  // strings in 'data_'.
  //
  // This array also contains one extra offset at the end, pointing
  // _after_ the last entry. This makes the code much simpler.
  //
  // The array is stored inside a 'faststring' instead of a vector<uint32_t> to
  // avoid the overhead of calling vector::push_back -- one would think it would
  // be fully inlined away, but it's actually a perf win to do this.
  faststring offsets_buf_;

  uint32_t num_elems_;
  rowid_t ordinal_pos_base_;

  // Index of the currently seeked element in the block.
  uint32_t cur_idx_;
};

} // namespace cfile
} // namespace kudu

#endif // KUDU_CFILE_BINARY_PREFIX_BLOCK_H
