blob: 9ff31c55b857719db3a5661ebdc061555617e50b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "util/parse-util.h"
#include <sstream>
#include <zstd.h>
#include <boost/algorithm/string/classification.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include "util/mem-info.h"
#include "util/string-parser.h"
#include "common/names.h"
using boost::algorithm::is_any_of;
using boost::algorithm::split;
using boost::algorithm::token_compress_on;
using boost::algorithm::trim;
namespace impala {
int64_t ParseUtil::ParseMemSpec(const string& mem_spec_str, bool* is_percent,
int64_t relative_reference) {
*is_percent = false;
if (mem_spec_str.empty()) return 0;
int64_t multiplier = -1;
int32_t number_str_len = mem_spec_str.size();
// Look for an accepted suffix such as "MB", "M", or "%".
string::const_reverse_iterator suffix_char = mem_spec_str.rbegin();
if (*suffix_char == 'b' || *suffix_char == 'B') {
// Skip "B", the default is bytes anyways.
if (suffix_char == mem_spec_str.rend()) return -1;
suffix_char++;
number_str_len--;
}
switch (*suffix_char) {
case 't':
case 'T':
// Terabytes.
number_str_len--;
multiplier = 1024L * 1024L * 1024L * 1024L;
break;
case 'g':
case 'G':
// Gigabytes.
number_str_len--;
multiplier = 1024L * 1024L * 1024L;
break;
case 'm':
case 'M':
// Megabytes.
number_str_len--;
multiplier = 1024L * 1024L;
break;
case 'k':
case 'K':
// Kilobytes
number_str_len--;
multiplier = 1024L;
break;
case '%':
// Don't allow a suffix of "%B".
if (suffix_char != mem_spec_str.rbegin()) return -1;
number_str_len--;
*is_percent = true;
break;
// The default is bytes. If there was a trailing "B" it was handled above.
}
StringParser::ParseResult result;
int64_t bytes;
if (multiplier != -1) {
// Parse float - MB or GB
double limit_val = StringParser::StringToFloat<double>(mem_spec_str.data(),
number_str_len, &result);
if (result != StringParser::PARSE_SUCCESS) return -1;
bytes = multiplier * limit_val;
} else {
// Parse int - bytes or percent
int64_t limit_val = StringParser::StringToInt<int64_t>(mem_spec_str.data(),
number_str_len, &result);
if (result != StringParser::PARSE_SUCCESS) return -1;
if (*is_percent) {
bytes = (static_cast<double>(limit_val) / 100.0) * relative_reference;
} else {
bytes = limit_val;
}
}
// Accept -1 as indicator for infinite memory that we report by a 0 return value.
if (bytes == -1) {
return 0;
}
return bytes;
}
Status ParseUtil::ParseCompressionCodec(
const string& compression_codec, THdfsCompression::type* type, int* level) {
// Acceptable values are:
// - zstd:compression_level
// - codec
vector<string> tokens;
split(tokens, compression_codec, is_any_of(":"), token_compress_on);
if (tokens.size() > 2) return Status("Invalid compression codec value");
string& codec_name = tokens[0];
trim(codec_name);
int compression_level = ZSTD_CLEVEL_DEFAULT;
THdfsCompression::type enum_type;
RETURN_IF_ERROR(GetThriftEnum(
codec_name, "compression codec", _THdfsCompression_VALUES_TO_NAMES, &enum_type));
if (tokens.size() == 2) {
if (enum_type != THdfsCompression::ZSTD) {
return Status("Compression level only supported for ZSTD");
}
StringParser::ParseResult status;
string& clevel = tokens[1];
trim(clevel);
compression_level = StringParser::StringToInt<int>(
clevel.c_str(), static_cast<int>(clevel.size()), &status);
if (status != StringParser::PARSE_SUCCESS || compression_level < 1
|| compression_level > ZSTD_maxCLevel()) {
return Status(Substitute("Invalid ZSTD compression level '$0'."
" Valid values are in [1,$1]",
clevel, ZSTD_maxCLevel()));
}
}
*type = enum_type;
*level = compression_level;
return Status::OK();
}
// Return all enum values in a string format, e.g. FOO(1), BAR(2), BAZ(3).
string GetThriftEnumValues(const map<int, const char*>& enum_values_to_names) {
bool first = true;
stringstream ss;
for (const auto& e : enum_values_to_names) {
if (!first) {
ss << ", ";
} else {
first = false;
}
ss << e.second << "(" << e.first << ")";
}
return ss.str();
}
}