blob: 09a16b5e8caa7cee1af41bf463b1a25b0bd57ee2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_EXPRS_STRING_FUNCTIONS_H
#define IMPALA_EXPRS_STRING_FUNCTIONS_H
#include <re2/re2.h>
#include <bitset>
#include "runtime/string-value.h"
#include "runtime/string-search.h"
using namespace impala_udf;
namespace impala {
using impala_udf::FunctionContext;
using impala_udf::AnyVal;
using impala_udf::BooleanVal;
using impala_udf::TinyIntVal;
using impala_udf::SmallIntVal;
using impala_udf::IntVal;
using impala_udf::BigIntVal;
using impala_udf::FloatVal;
using impala_udf::DoubleVal;
using impala_udf::TimestampVal;
using impala_udf::StringVal;
using impala_udf::DecimalVal;
class Expr;
class OpcodeRegistry;
class TupleRow;
class StringFunctions {
public:
// String trimming position or direction
enum TrimPosition {
LEADING, // Trim from the begining, or leading end
TRAILING, // Trim from the right, or trailing end
BOTH // Trim from both ends of string
};
static StringVal Substring(FunctionContext*, const StringVal& str, const BigIntVal& pos,
const BigIntVal& len);
static StringVal Substring(FunctionContext*, const StringVal& str,
const BigIntVal& pos);
static StringVal SplitPart(FunctionContext* context, const StringVal& str,
const StringVal& delim, const BigIntVal& field);
static StringVal Left(FunctionContext*, const StringVal& str, const BigIntVal& len);
static StringVal Right(FunctionContext*, const StringVal& str, const BigIntVal& len);
static StringVal Space(FunctionContext*, const BigIntVal& len);
static StringVal Repeat(FunctionContext*, const StringVal& str, const BigIntVal& n);
static StringVal Lpad(FunctionContext*, const StringVal& str, const BigIntVal& len,
const StringVal& pad);
static StringVal Rpad(FunctionContext*, const StringVal& str, const BigIntVal&,
const StringVal& pad);
static IntVal Length(FunctionContext*, const StringVal& str);
static IntVal CharLength(FunctionContext*, const StringVal& str);
static StringVal Lower(FunctionContext*, const StringVal& str);
static StringVal Upper(FunctionContext*, const StringVal& str);
static StringVal InitCap(FunctionContext*, const StringVal& str);
static void ReplacePrepare(FunctionContext*, FunctionContext::FunctionStateScope);
static void ReplaceClose(FunctionContext*, FunctionContext::FunctionStateScope);
static StringVal Replace(FunctionContext*, const StringVal& str,
const StringVal& pattern, const StringVal& replace);
static StringVal Reverse(FunctionContext*, const StringVal& str);
static StringVal Translate(FunctionContext*, const StringVal& str, const StringVal& src,
const StringVal& dst);
static StringVal Trim(FunctionContext*, const StringVal& str);
static StringVal Ltrim(FunctionContext*, const StringVal& str);
static StringVal Rtrim(FunctionContext*, const StringVal& str);
/// Sets up arguments and function context for the *TrimString functions below.
static void TrimPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
/// Cleans up the work done by TrimPrepare above.
static void TrimClose(FunctionContext*, FunctionContext::FunctionStateScope);
/// Trims occurrences of the characters in 'chars_to_trim' string from
/// the beginning of string 'str'.
static StringVal LTrimString(FunctionContext* ctx, const StringVal& str,
const StringVal& chars_to_trim);
/// Trims occurrences of the characters in 'chars_to_trim' string from
/// the end of string 'str'.
static StringVal RTrimString(FunctionContext* ctx, const StringVal& str,
const StringVal& chars_to_trim);
/// Trims occurrences of the characters in 'chars_to_trim' string from
/// both ends of string 'str'.
static StringVal BTrimString(FunctionContext* ctx, const StringVal& str,
const StringVal& chars_to_trim);
static IntVal Ascii(FunctionContext*, const StringVal& str);
static IntVal Instr(FunctionContext*, const StringVal& str, const StringVal& substr,
const BigIntVal& start_position, const BigIntVal& occurrence);
static IntVal Instr(FunctionContext*, const StringVal& str, const StringVal& substr,
const BigIntVal& start_position);
static IntVal Instr(FunctionContext*, const StringVal& str, const StringVal& substr);
static IntVal Locate(FunctionContext*, const StringVal& substr, const StringVal& str);
static IntVal LocatePos(FunctionContext*, const StringVal& substr, const StringVal& str,
const BigIntVal& start_pos);
static bool SetRE2Options(const StringVal& match_parameter, std::string* error_str,
re2::RE2::Options* opts);
static void RegexpPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
static void RegexpClose(FunctionContext*, FunctionContext::FunctionStateScope);
static StringVal RegexpEscape(FunctionContext*, const StringVal& str);
static StringVal RegexpExtract(FunctionContext*, const StringVal& str,
const StringVal& pattern, const BigIntVal& index);
static StringVal RegexpReplace(FunctionContext*, const StringVal& str,
const StringVal& pattern, const StringVal& replace);
static void RegexpMatchCountPrepare(FunctionContext* context,
FunctionContext::FunctionStateScope scope);
static IntVal RegexpMatchCount2Args(FunctionContext* context, const StringVal& str,
const StringVal& pattern);
static IntVal RegexpMatchCount4Args(FunctionContext* context, const StringVal& str,
const StringVal& pattern, const IntVal& start_pos,
const StringVal& match_parameter);
static StringVal Concat(FunctionContext*, int num_children, const StringVal* strs);
static StringVal ConcatWs(FunctionContext*, const StringVal& sep, int num_children,
const StringVal* strs);
static IntVal FindInSet(FunctionContext*, const StringVal& str,
const StringVal& str_set);
static void ParseUrlPrepare(FunctionContext*, FunctionContext::FunctionStateScope);
static StringVal ParseUrl(FunctionContext*, const StringVal& url,
const StringVal& part);
static StringVal ParseUrlKey(FunctionContext*, const StringVal& url,
const StringVal& key, const StringVal& part);
static void ParseUrlClose(FunctionContext*, FunctionContext::FunctionStateScope);
/// Converts ASCII 'val' to corresponding character.
static StringVal Chr(FunctionContext* context, const IntVal& val);
static StringVal Base64Encode(FunctionContext* ctx, const StringVal& str);
static StringVal Base64Decode(FunctionContext* ctx, const StringVal& str);
static StringVal GetJsonObject(FunctionContext* ctx, const StringVal& json_str,
const StringVal& path_str);
/// Implementation of GetJsonObject, not cross-compiled since no significant benefits
/// can gain.
static StringVal GetJsonObjectImpl(FunctionContext* ctx, const StringVal& json_str,
const StringVal& path_str);
static IntVal Levenshtein(
FunctionContext* context, const StringVal& s1, const StringVal& s2);
static IntVal DamerauLevenshtein(
FunctionContext* context, const StringVal& s1, const StringVal& s2);
static DoubleVal JaroDistance(
FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
static DoubleVal JaroSimilarity(
FunctionContext* ctx, const StringVal& s1, const StringVal& s2);
static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2);
static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2, const DoubleVal& scaling_factor);
static DoubleVal JaroWinklerDistance(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2, const DoubleVal& scaling_factor,
const DoubleVal& boost_threshold);
static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2);
static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2, const DoubleVal& scaling_factor);
static DoubleVal JaroWinklerSimilarity(FunctionContext* ctx, const StringVal& s1,
const StringVal& s2, const DoubleVal& scaling_factor,
const DoubleVal& boost_threshold);
private:
/// Templatized implementation of the actual string trimming function.
/// The first parameter, 'D', is one of StringFunctions::TrimPosition values.
/// The second parameter, 'IS_IMPLICIT_WHITESPACE', is true when the set of characters
/// to trim is implicitly set to ' ', as a result of calling the one-arg
/// forms of trim/ltrim/rtrim.
template <TrimPosition D, bool IS_IMPLICIT_WHITESPACE>
static StringVal DoTrimString(FunctionContext* ctx, const StringVal& str,
const StringVal& chars_to_trim);
};
}
#endif