// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "kudu/fs/log_block_manager-test-util.h"

#include <algorithm>
#include <cstring>
#include <memory>
#include <ostream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include <gflags/gflags_declare.h>
#include <glog/logging.h>

#include "kudu/fs/block_id.h"
#include "kudu/fs/fs.pb.h"
#include "kudu/fs/log_block_manager.h"
#include "kudu/gutil/integral_types.h"
#include "kudu/gutil/strings/strcat.h"
#include "kudu/gutil/strings/strip.h"
#include "kudu/util/env.h"
#include "kudu/util/path_util.h"
#include "kudu/util/pb_util.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"

DECLARE_uint64(log_container_max_size);

namespace kudu {
namespace fs {

using pb_util::WritablePBContainerFile;
using std::shared_ptr;
using std::string;
using std::vector;
using std::unique_ptr;
using std::unordered_map;

LBMCorruptor::LBMCorruptor(Env* env, vector<string> data_dirs, uint32_t rand_seed)
    : env_(env),
      data_dirs_(std::move(data_dirs)),
      rand_(rand_seed) {
  CHECK_GT(data_dirs_.size(), 0);
}

Status LBMCorruptor::Init() {
  vector<Container> all_containers;
  vector<Container> full_containers;

  for (const auto& dd : data_dirs_) {
    vector<string> dd_files;
    unordered_map<string, Container> containers_by_name;
    RETURN_NOT_OK(env_->GetChildren(dd, &dd_files));
    for (const auto& f : dd_files) {
      // As we iterate over each file in the data directory, keep track of data
      // and metadata files, so that only containers with both will be included.
      string stripped;
      if (TryStripSuffixString(
          f, LogBlockManager::kContainerDataFileSuffix, &stripped)) {
        containers_by_name[stripped].name = stripped;
        containers_by_name[stripped].data_filename = JoinPathSegments(dd, f);
      } else if (TryStripSuffixString(
          f, LogBlockManager::kContainerMetadataFileSuffix, &stripped)) {
        containers_by_name[stripped].name = stripped;
        containers_by_name[stripped].metadata_filename = JoinPathSegments(dd, f);
      }
    }

    for (const auto& e : containers_by_name) {
      // Only include the container if both of its files were present.
      if (!e.second.data_filename.empty() &&
          !e.second.metadata_filename.empty()) {
        all_containers.push_back(e.second);

        // File size is an imprecise proxy for whether a container is full, but
        // it should be good enough.
        uint64_t data_file_size;
        RETURN_NOT_OK(env_->GetFileSize(e.second.data_filename, &data_file_size));
        if (data_file_size >= FLAGS_log_container_max_size) {
          full_containers.push_back(e.second);
        }
      }
    }
  }

  all_containers_ = std::move(all_containers);
  full_containers_ = std::move(full_containers);
  return Status::OK();
}

Status LBMCorruptor::PreallocateFullContainer() {
  const int kPreallocateBytes = 16 * 1024;
  const Container* c = nullptr;
  RETURN_NOT_OK(GetRandomContainer(FULL, &c));

  // Pick one of the preallocation modes at random; both are recoverable.
  RWFile::PreAllocateMode mode;
  int r = rand_.Uniform(2);
  if (r == 0) {
    mode = RWFile::CHANGE_FILE_SIZE;
  } else {
    CHECK_EQ(r, 1);
    mode = RWFile::DONT_CHANGE_FILE_SIZE;
  }

  unique_ptr<RWFile> data_file;
  RWFileOptions opts;
  opts.mode = Env::MUST_EXIST;
  opts.is_sensitive = true;
  RETURN_NOT_OK(env_->NewRWFile(opts, c->data_filename, &data_file));
  int64_t initial_size;
  RETURN_NOT_OK(PreallocateForBlock(data_file.get(), mode,
                                    kPreallocateBytes, &initial_size));
  if (mode == RWFile::DONT_CHANGE_FILE_SIZE) {
    // Some older versions of ext4 (such as on el6) will not truncate unwritten
    // preallocated space that extends beyond the file size. Let's help them
    // out by writing a single byte into that space.
    RETURN_NOT_OK(data_file->Write(initial_size, "a"));
  }

  RETURN_NOT_OK(data_file->Close());

  LOG(INFO) << "Preallocated full container " << c->name;
  return Status::OK();
}

Status LBMCorruptor::AddUnpunchedBlockToFullContainer() {
  const Container* c = nullptr;
  RETURN_NOT_OK(GetRandomContainer(FULL, &c));

  uint64_t fs_block_size;
  RETURN_NOT_OK(env_->GetBlockSize(c->data_filename, &fs_block_size));

  // "Write" out the block by growing the data file by some random amount.
  //
  // Must be non-zero length, otherwise preallocation will fail.
  unique_ptr<RWFile> data_file;
  RWFileOptions opts;
  opts.mode = Env::MUST_EXIST;
  opts.is_sensitive = true;
  RETURN_NOT_OK(env_->NewRWFile(opts, c->data_filename, &data_file));
  int64_t block_length = (rand_.Uniform(16) + 1) * fs_block_size;
  int64_t initial_data_size;
  RETURN_NOT_OK(PreallocateForBlock(data_file.get(), RWFile::CHANGE_FILE_SIZE,
                                    block_length, &initial_data_size));
  RETURN_NOT_OK(data_file->Close());

  // Having written out the block, write both CREATE and DELETE metadata
  // records for it.
  unique_ptr<WritablePBContainerFile> metadata_writer;
  RETURN_NOT_OK(OpenMetadataWriter(*c, &metadata_writer));
  BlockId block_id(rand_.Next64());
  RETURN_NOT_OK(AppendCreateRecord(metadata_writer.get(), block_id,
                                   initial_data_size, block_length));
  RETURN_NOT_OK(AppendDeleteRecord(metadata_writer.get(), block_id));

  LOG(INFO) << "Added unpunched block to full container " << c->name;
  return metadata_writer->Close();
}

Status LBMCorruptor::CreateIncompleteContainer() {
  unique_ptr<RWFile> data_file;
  unique_ptr<RWFile> metadata_file;
  string unsuffixed_path = JoinPathSegments(GetRandomDataDir(),
                                            oid_generator_.Next());
  string data_fname = StrCat(
      unsuffixed_path, LogBlockManager::kContainerDataFileSuffix);
  string metadata_fname = StrCat(
      unsuffixed_path, LogBlockManager::kContainerMetadataFileSuffix);

  // Create an incomplete container. Kinds of incomplete containers:
  //
  // 1. Empty data file but no metadata file.
  // 2. No data file but metadata file exists (and is up to a certain size).
  // 3. Empty data file and metadata file exists (and is up to a certain size).
  int r = rand_.Uniform(3);
  RWFileOptions opts;
  opts.is_sensitive = true;
  if (r == 0) {
    RETURN_NOT_OK(env_->NewRWFile(opts, data_fname, &data_file));
  } else if (r == 1) {
    RETURN_NOT_OK(env_->NewRWFile(opts, metadata_fname, &data_file));
  } else {
    CHECK_EQ(r, 2);
    RETURN_NOT_OK(env_->NewRWFile(opts, data_fname, &data_file));
    RETURN_NOT_OK(env_->NewRWFile(opts, metadata_fname, &data_file));
  }

  if (data_file) {
    RETURN_NOT_OK(data_file->Close());
  }

  if (metadata_file) {
    int md_length = rand_.Uniform(pb_util::kPBContainerMinimumValidLength);
    RETURN_NOT_OK(metadata_file->Truncate(md_length));
    RETURN_NOT_OK(metadata_file->Close());
  }

  LOG(INFO) << "Created incomplete container " << unsuffixed_path;
  return Status::OK();
}

Status LBMCorruptor::AddMalformedRecordToContainer() {
  const int kBlockSize = 16 * 1024;
  const Container* c;
  RETURN_NOT_OK(GetRandomContainer(ANY, &c));

  // Ensure the container's data file has enough space for the new block. We're
  // not going to fill that space, but this ensures that the block's record
  // isn't considered malformed only because it stretches past the end of the
  // data file.
  int64_t initial_data_size;
  {
    unique_ptr<RWFile> data_file;
    RWFileOptions opts;
    opts.mode = Env::MUST_EXIST;
    opts.is_sensitive = true;
    RETURN_NOT_OK(env_->NewRWFile(opts, c->data_filename, &data_file));
    RETURN_NOT_OK(PreallocateForBlock(data_file.get(), RWFile::CHANGE_FILE_SIZE,
                                      kBlockSize, &initial_data_size));
    RETURN_NOT_OK(data_file->Close());
  }

  // Create a good record.
  BlockId block_id(rand_.Next64());
  BlockRecordPB record;
  block_id.CopyToPB(record.mutable_block_id());
  record.set_op_type(CREATE);
  record.set_offset(initial_data_size);
  record.set_length(kBlockSize);
  record.set_timestamp_us(0);

  unique_ptr<WritablePBContainerFile> metadata_writer;
  RETURN_NOT_OK(OpenMetadataWriter(*c, &metadata_writer));

  // Corrupt the record in some way. Kinds of malformed records (as per the
  // malformed record checking code in log_block_manager.cc):
  //
  // 0. No block offset.
  // 1. No block length.
  // 2. Negative block offset.
  // 3. Negative block length.
  // 4. Offset + length > data file size.
  // 5. Two CREATEs for same block ID.
  // 6. DELETE without first matching CREATE.
  // 7. Unrecognized op type.
  int r = rand_.Uniform(8);
  if (r == 0) {
    record.clear_offset();
  } else if (r == 1) {
    record.clear_length();
  } else if (r == 2) {
    record.set_offset(-1);
  } else if (r == 3) {
    record.set_length(-1);
  } else if (r == 4) {
    record.set_offset(kint64max / 2);
  } else if (r == 5) {
    RETURN_NOT_OK(metadata_writer->Append(record));
  } else if (r == 6) {
    record.clear_offset();
    record.clear_length();
    record.set_op_type(DELETE);
  } else {
    CHECK_EQ(r, 7);
    record.set_op_type(UNKNOWN);
  }

  LOG(INFO) << "Added malformed record to container " << c->name;
  return metadata_writer->Append(record);
}

Status LBMCorruptor::AddMisalignedBlockToContainer() {
  const Container* c;
  RETURN_NOT_OK(GetRandomContainer(ANY, &c));

  uint64_t fs_block_size;
  RETURN_NOT_OK(env_->GetBlockSize(c->data_filename, &fs_block_size));

  unique_ptr<RWFile> data_file;
  RWFileOptions opts;
  opts.mode = Env::MUST_EXIST;
  opts.is_sensitive = true;
  RETURN_NOT_OK(env_->NewRWFile(opts, c->data_filename, &data_file));
  uint64_t initial_data_size;
  RETURN_NOT_OK(data_file->Size(&initial_data_size));

  // Pick a random offset beyond the end of the file to place the new block,
  // ensuring that the offset isn't aligned with the filesystem block size.
  //
  // In accordance with KUDU-1793 (which sparked the entire concept of
  // misaligned blocks in the first place), misaligned blocks may not intrude
  // on the aligned space of the blocks that came before them. To avoid having
  // to read the container's records just to corrupt it, we'll arbitrarily add
  // a fs_block_size gap before this misaligned block, to ensure that it
  // doesn't violate the previous block's alignment.
  uint64_t block_offset =
      initial_data_size + fs_block_size + rand_.Uniform(fs_block_size);
  if (block_offset % fs_block_size == 0) {
    block_offset++;
  }

  // Ensure the file is preallocated at least up to the offset, in case we
  // decide to write a zero-length block to the end of it.
  uint64_t length_beyond_eof = block_offset - initial_data_size;
  if (length_beyond_eof > 0) {
    RETURN_NOT_OK(data_file->PreAllocate(initial_data_size, length_beyond_eof,
                                         RWFile::CHANGE_FILE_SIZE));
  }

  // Populate the block with repeated sequences of its id so that readers who
  // wish to verify its contents can do so easily. To avoid a truncated
  // sequence at the end of the block, we also ensure that the block's length
  // is a multiple of the id's type.
  BlockId block_id(rand_.Next64());
  uint64_t raw_block_id = block_id.id();
  uint64_t block_length = rand_.Uniform(fs_block_size * 4);
  block_length -= block_length % sizeof(raw_block_id);
  uint8_t data[block_length];
  for (int i = 0; i < ARRAYSIZE(data); i += sizeof(raw_block_id)) {
    memcpy(&data[i], &raw_block_id, sizeof(raw_block_id));
  }
  RETURN_NOT_OK(data_file->Write(block_offset, Slice(data, ARRAYSIZE(data))));
  RETURN_NOT_OK(data_file->Close());

  // Having written out the block, write a corresponding metadata record.
  unique_ptr<WritablePBContainerFile> metadata_writer;
  RETURN_NOT_OK(OpenMetadataWriter(*c, &metadata_writer));
  RETURN_NOT_OK(AppendCreateRecord(metadata_writer.get(), block_id,
                                   block_offset, block_length));

  LOG(INFO) << "Added misaligned block to container " << c->name;
  return metadata_writer->Close();
}

Status LBMCorruptor::AddPartialRecordToContainer() {
  const Container* c;
  RETURN_NOT_OK(GetRandomContainer(ANY, &c));

  unique_ptr<WritablePBContainerFile> metadata_writer;
  RETURN_NOT_OK(OpenMetadataWriter(*c, &metadata_writer));

  // Add a new good record to the container.
  RETURN_NOT_OK(AppendCreateRecord(metadata_writer.get(),
                                   BlockId(rand_.Next64()),
                                   0, 0));

  // Corrupt the record by truncating one byte off the end of it.
  {
    RWFileOptions opts;
    opts.mode = Env::MUST_EXIST;
    opts.is_sensitive = true;
    unique_ptr<RWFile> metadata_file;
    RETURN_NOT_OK(env_->NewRWFile(opts, c->metadata_filename, &metadata_file));
    uint64_t initial_metadata_size;
    RETURN_NOT_OK(metadata_file->Size(&initial_metadata_size));
    RETURN_NOT_OK(metadata_file->Truncate(initial_metadata_size - 1));
  }

  // Once a container has a partial record, it cannot be further corrupted by
  // the corruptor.

  // Make a local copy of the container's name; erase() below will free it.
  string container_name = c->name;

  auto remove_matching_container = [&](const Container& e) {
    return container_name == e.name;
  };
  all_containers_.erase(std::remove_if(all_containers_.begin(),
                                       all_containers_.end(),
                                       remove_matching_container),
                        all_containers_.end());
  full_containers_.erase(std::remove_if(full_containers_.begin(),
                                        full_containers_.end(),
                                        remove_matching_container),
                        full_containers_.end());

  LOG(INFO) << "Added partial record to container " << container_name;
  return Status::OK();
}

Status LBMCorruptor::InjectRandomNonFatalInconsistency() {
  while (true) {
    int r = rand_.Uniform(5);
    switch (r) {
      case 0:
        return AddMisalignedBlockToContainer();
      case 1:
        return CreateIncompleteContainer();
      case 2:
        if (full_containers_.empty()) {
          // Loop and try a different operation.
          break;
        }
        return PreallocateFullContainer();
      case 3:
        if (full_containers_.empty()) {
          // Loop and try a different operation.
          break;
        }
        return AddUnpunchedBlockToFullContainer();
      case 4:
        return AddPartialRecordToContainer();
      default:
        LOG(FATAL) << "Unexpected value " << r;
    }
  }
}

Status LBMCorruptor::OpenMetadataWriter(
    const Container& container,
    unique_ptr<WritablePBContainerFile>* writer) {
  RWFileOptions opts;
  opts.mode = Env::MUST_EXIST;
  opts.is_sensitive = true;
  unique_ptr<RWFile> metadata_file;
  RETURN_NOT_OK(env_->NewRWFile(opts,
                                container.metadata_filename,
                                &metadata_file));
  unique_ptr<WritablePBContainerFile> local_writer(
      new WritablePBContainerFile(shared_ptr<RWFile>(metadata_file.release())));
  RETURN_NOT_OK(local_writer->OpenExisting());

  *writer = std::move(local_writer);
  return Status::OK();
}

Status LBMCorruptor::AppendCreateRecord(WritablePBContainerFile* writer,
                                        BlockId block_id,
                                        int64_t block_offset,
                                        int64_t block_length) {
  BlockRecordPB record;
  block_id.CopyToPB(record.mutable_block_id());
  record.set_op_type(CREATE);
  record.set_offset(block_offset);
  record.set_length(block_length);
  record.set_timestamp_us(0); // has no effect
  return writer->Append(record);
}

Status LBMCorruptor::AppendDeleteRecord(WritablePBContainerFile* writer,
                                        BlockId block_id) {
  BlockRecordPB record;
  block_id.CopyToPB(record.mutable_block_id());
  record.set_op_type(DELETE);
  record.set_timestamp_us(0); // has no effect
  return writer->Append(record);
}

Status LBMCorruptor::PreallocateForBlock(RWFile* data_file,
                                         RWFile::PreAllocateMode mode,
                                         int64_t block_length,
                                         int64_t* old_data_file_size) {
  uint64_t initial_size;
  RETURN_NOT_OK(data_file->Size(&initial_size));
  RETURN_NOT_OK(data_file->PreAllocate(initial_size, block_length, mode));

  *old_data_file_size = initial_size;
  return Status::OK();
}

Status LBMCorruptor::GetRandomContainer(FindContainerMode mode,
                                        const Container** container) const {
  if (mode == FULL) {
    if (full_containers_.empty()) {
      return Status::IllegalState("no full containers");
    }
    *container = &full_containers_[rand_.Uniform(full_containers_.size())];
    return Status::OK();
  }

  CHECK_EQ(mode, ANY);
  if (all_containers_.empty()) {
    return Status::IllegalState("no containers");
  }
  *container = &all_containers_[rand_.Uniform(all_containers_.size())];
  return Status::OK();
}

const string& LBMCorruptor::GetRandomDataDir() const {
  return data_dirs_[rand_.Uniform(data_dirs_.size())];
}

} // namespace fs
} // namespace kudu
