pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java - pinot - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.pinot.common.function.scalar;

 import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet;
 import it.unimi.dsi.fastutil.objects.ObjectSet;
 import java.io.UnsupportedEncodingException;
 import java.net.URLDecoder;
 import java.net.URLEncoder;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 import java.text.Normalizer;
 import java.util.Base64;
 import java.util.UUID;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import javax.annotation.Nullable;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.pinot.common.utils.RegexpPatternConverterUtils;
 import org.apache.pinot.spi.annotations.ScalarFunction;
 import org.apache.pinot.spi.utils.JsonUtils;


 /**
  * Inbuilt String Transformation Functions
  * The functions can be used as UDFs in Query when added in the FunctionRegistry.
  * @ScalarFunction annotation is used with each method for the registration
  *
  * Example usage:
  * <code> SELECT UPPER(playerName) FROM baseballStats LIMIT 10 </code>
  */
 public class StringFunctions {
   private StringFunctions() {
   }

   private final static Pattern LTRIM = Pattern.compile("^\\s+");
   private final static Pattern RTRIM = Pattern.compile("\\s+$");

   /**
    * @see StringUtils#reverse(String)
    * @param input
    * @return reversed input in from end to start
    */
   @ScalarFunction
   public static String reverse(String input) {
     return StringUtils.reverse(input);
   }

   /**
    * @see String#toLowerCase())
    * @param input
    * @return string in lower case format
    */
   @ScalarFunction
   public static String lower(String input) {
     return input.toLowerCase();
   }

   /**
    * @see String#toUpperCase()
    * @param input
    * @return string in upper case format
    */
   @ScalarFunction
   public static String upper(String input) {
     return input.toUpperCase();
   }

   /**
    * @see String#substring(int)
    * @param input Parent string
    * @param beginIndex index from which substring should be created
    * @return substring from beginIndex to end of the parent string
    */
   @ScalarFunction
   public static String substr(String input, int beginIndex) {
     return StringUtils.substring(input, beginIndex);
   }

   /**
    * Returns the substring of the main string from beginIndex to endIndex.
    * If endIndex is -1 returns the substring from begingIndex to end of the string.
    *
    * @see String#substring(int, int)
    * @param input Parent string
    * @param beginIndex index from which substring should be created
    * @param endIndex index at which substring should be terminated
    * @return substring from beginIndex to endIndex
    */
   @ScalarFunction
   public static String substr(String input, int beginIndex, int endIndex) {
     if (endIndex == -1) {
       return substr(input, beginIndex);
     }
     return StringUtils.substring(input, beginIndex, endIndex);
   }

   /**
    * @param input Parent string
    * @param beginIndex 1 based index from which substring should be created
    * @return substring from beginIndex to end of the parent string
    */
   @ScalarFunction
   public static String substring(String input, int beginIndex) {
     return StringUtils.substring(input, beginIndex - 1);
   }

   /**
    * Returns the substring of the main string from beginIndex of length.
    *
    * @param input Parent string
    * @param beginIndex 1 based index from which substring should be created
    * @param length length of substring to be created
    * @return a substirng of input string from beginIndex of length 'length'
    */
   @ScalarFunction
   public static String substring(String input, int beginIndex, int length) {
     // index is always 1 based
     beginIndex = beginIndex - 1;
     int endIndex = beginIndex + length;
     return StringUtils.substring(input, beginIndex, endIndex);
   }

   /**
    * Join two input string with seperator in between
    * @param input1
    * @param input2
    * @param seperator
    * @return The two input strings joined by the seperator
    */
   @ScalarFunction(names = "concat_ws")
   public static String concatws(String seperator, String input1, String input2) {
     return concat(input1, input2, seperator);
   }

   /**
    * Join two input string with seperator in between
    * @param input1
    * @param input2
    * @param seperator
    * @return The two input strings joined by the seperator
    */
   @ScalarFunction
   public static String concat(String input1, String input2, String seperator) {
     String result = input1;
     result = result + seperator + input2;
     return result;
   }

   /**
    * Join two input string with no seperator in between
    * @param input1
    * @param input2
    * @return The two input strings joined
    */
   @ScalarFunction
   public static String concat(String input1, String input2) {
     return concat(input1, input2, "");
   }

   /**
    * @see String#trim()
    * @param input
    * @return trim spaces from both ends of the string
    */
   @ScalarFunction
   public static String trim(String input) {
     return input.trim();
   }

   /**
    * Standard SQL trim function.
    *
    * @param end BOTH|LEADING|TRAILING
    * @param characters characters to be trimmed off
    * @param value value to trim
    * @return trim the characters from both/leading/trailing end of the string
    */
   @ScalarFunction
   public static String trim(String end, String characters, String value) {
     int length = value.length();
     int startIndex = 0;
     int endIndex = length;
     if (end.equals("BOTH") || end.equals("LEADING")) {
       while (startIndex < endIndex) {
         if (characters.indexOf(value.charAt(startIndex)) >= 0) {
           startIndex++;
         } else {
           break;
         }
       }
     }
     if (end.equals("BOTH") || end.equals("TRAILING")) {
       while (startIndex < endIndex) {
         if (characters.indexOf(value.charAt(endIndex - 1)) >= 0) {
           endIndex--;
         } else {
           break;
         }
       }
     }
     if (startIndex > 0 || endIndex < length) {
       return value.substring(startIndex, endIndex);
     } else {
       return value;
     }
   }

   /**
    * @param input
    * @return trim spaces from left side of the string
    */
   @ScalarFunction
   public static String ltrim(String input) {
     return LTRIM.matcher(input).replaceAll("");
   }

   /**
    * @param input
    * @return trim spaces from right side of the string
    */
   @ScalarFunction
   public static String rtrim(String input) {
     return RTRIM.matcher(input).replaceAll("");
   }

   /**
    * @see StringUtils#left(String, int)
    * @param input
    * @return get substring starting from the first index and extending upto specified length.
    */
   @ScalarFunction(names = {"leftSubStr", "left"})
   public static String leftSubStr(String input, int length) {
     return StringUtils.left(input, length);
   }

   /**
    * @see StringUtils#right(String, int)
    * @param input
    * @return get substring ending at the last index with specified length
    */
   @ScalarFunction(names = {"rightSubStr", "right"})
   public static String rightSubStr(String input, int length) {
     return StringUtils.right(input, length);
   }

   /**
    * @see #StringFunctions#regexpExtract(String, String, int, String)
    * @param value
    * @param regexp
    * @return the matched result.
    */
   @ScalarFunction(names = {"regexp_extract", "regexpExtract"})
   public static String regexpExtract(String value, String regexp) {
     return regexpExtract(value, regexp, 0, "");
   }

   /**
    * @see #StringFunctions#regexpExtract(String, String, int, String)
    * @param value
    * @param regexp
    * @param group
    * @return the matched result.
    */
   @ScalarFunction(names = {"regexp_extract", "regexpExtract"})
   public static String regexpExtract(String value, String regexp, int group) {
     return regexpExtract(value, regexp, group, "");
   }

   /**
    * Regular expression that extract first matched substring.
    * @param value input value
    * @param regexp regular expression
    * @param group the group number within the regular expression to extract.
    * @param defaultValue the default value if no match found
    * @return the matched result
    */
   @ScalarFunction(names = {"regexp_extract", "regexpExtract"})
   public static String regexpExtract(String value, String regexp, int group, String defaultValue) {
     Pattern p = Pattern.compile(regexp);
     Matcher matcher = p.matcher(value);
     if (matcher.find() && matcher.groupCount() >= group) {
       return matcher.group(group);
     } else {
       return defaultValue;
     }
   }

   /**
    * @see String#length()
    * @param input
    * @return length of the string
    */
   @ScalarFunction
   public static int length(String input) {
     return input.length();
   }

   /**
    * @see StringUtils#ordinalIndexOf(CharSequence, CharSequence, int)
    * Return the Nth occurence of a substring within the String
    * @param input
    * @param find substring to find
    * @param instance Integer denoting the instance no.
    * @return start index of the Nth instance of substring in main string
    */
   @ScalarFunction
   public static int strpos(String input, String find, int instance) {
     return StringUtils.ordinalIndexOf(input, find, instance);
   }

   /**
    * @see StringUtils#indexOf(CharSequence, CharSequence)
    * Return the 1st occurence of a substring within the String
    * @param input
    * @param find substring to find
    * @return start index of the 1st instance of substring in main string
    */
   @ScalarFunction
   public static int strpos(String input, String find) {
     return StringUtils.indexOf(input, find);
   }

   /**
    * @see StringUtils#lastIndexOf(CharSequence, CharSequence)
    * Return the last occurence of a substring within the String
    * @param input
    * @param find substring to find
    * @return start index of the last instance of substring in main string
    */
   @ScalarFunction
   public static int strrpos(String input, String find) {
     return StringUtils.lastIndexOf(input, find);
   }

   /**
    * @see StringUtils#lastIndexOf(CharSequence, CharSequence, int)
    * Return the Nth occurence of a substring in string starting from the end of the string.
    * @param input
    * @param find substring to find
    * @param instance Integer denoting the instance no.
    * @return start index of the Nth instance of substring in main string starting from the end of the string.
    */
   @ScalarFunction
   public static int strrpos(String input, String find, int instance) {
     return StringUtils.lastIndexOf(input, find, instance);
   }

   /**
    * @see StringUtils#startsWith(CharSequence, CharSequence)
    * @param input
    * @param prefix substring to check if it is the prefix
    * @return true if string starts with prefix, false o.w.
    */
   @ScalarFunction(names = {"startsWith", "starts_with"})
   public static boolean startsWith(String input, String prefix) {
     return StringUtils.startsWith(input, prefix);
   }

   /**
    * @see StringUtils#endsWith(CharSequence, CharSequence)
    * @param input
    * @param suffix substring to check if it is the prefix
    * @return true if string ends with prefix, false o.w.
    */
   @ScalarFunction(names = {"endsWith", "ends_with"})
   public static boolean endsWith(String input, String suffix) {
     return StringUtils.endsWith(input, suffix);
   }

   /**
    * @see String#replaceAll(String, String)
    * @param input
    * @param find target substring to replace
    * @param substitute new substring to be replaced with target
    */
   @ScalarFunction
   public static String replace(String input, String find, String substitute) {
     return StringUtils.replace(input, find, substitute);
   }

   /**
    * @see StringUtils#rightPad(String, int, char)
    * @param input
    * @param size final size of the string
    * @param pad pad string to be used
    * @return string padded from the right side with pad to reach final size
    */
   @ScalarFunction
   public static String rpad(String input, int size, String pad) {
     return StringUtils.rightPad(input, size, pad);
   }

   /**
    * @see StringUtils#leftPad(String, int, char)
    * @param input
    * @param size final size of the string
    * @param pad pad string to be used
    * @return string padded from the left side with pad to reach final size
    */
   @ScalarFunction
   public static String lpad(String input, int size, String pad) {
     return StringUtils.leftPad(input, size, pad);
   }

   /**
    * @see String#codePointAt(int)
    * @param input
    * @return the Unicode codepoint of the first character of the string
    */
   @ScalarFunction
   public static int codepoint(String input) {
     return input.codePointAt(0);
   }

   /**
    * @see Character#toChars(int)
    * @param codepoint
    * @return the character corresponding to the Unicode codepoint
    */
   @ScalarFunction
   public static String chr(int codepoint) {
     char[] result = Character.toChars(codepoint);
     return new String(result);
   }

   /**
    * @param bytes
    * @param charsetName encoding
    * @return bytearray to string
    * returns null on exception
    */
   @ScalarFunction
   public static String fromBytes(byte[] bytes, String charsetName) {
     try {
       return new String(bytes, charsetName);
     } catch (UnsupportedEncodingException e) {
       return null;
     }
   }

   /**
    * @param input
    * @param charsetName encoding
    * @return bytearray to string
    * returns null on exception
    */
   @ScalarFunction
   public static byte[] toBytes(String input, String charsetName) {
     try {
       return input.getBytes(charsetName);
     } catch (UnsupportedEncodingException e) {
       return null;
     }
   }

   /**
    * @see StandardCharsets#UTF_8#encode(String)
    * @param input
    * @return bytes
    */
   @ScalarFunction
   public static byte[] toUtf8(String input) {
     return input.getBytes(StandardCharsets.UTF_8);
   }

   /**
    * @param input bytes
    * @return UTF8 encoded string
    */
   @ScalarFunction
   public static String fromUtf8(byte[] input) {
     return new String(input, StandardCharsets.UTF_8);
   }

   /**
    * @see StandardCharsets#US_ASCII#encode(String)
    * @param input
    * @return bytes
    */
   @ScalarFunction
   public static byte[] toAscii(String input) {
     return input.getBytes(StandardCharsets.US_ASCII);
   }

   /**
    * @param input bytes
    * @return ASCII encoded string
    */
   @ScalarFunction
   public static String fromAscii(byte[] input) {
     return new String(input, StandardCharsets.US_ASCII);
   }

   /**
    * @param input UUID as string
    * @return bytearray
    * returns bytes and null on exception
    */
   @ScalarFunction
   public static byte[] toUUIDBytes(String input) {
     try {
       UUID uuid = UUID.fromString(input);
       ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
       bb.putLong(uuid.getMostSignificantBits());
       bb.putLong(uuid.getLeastSignificantBits());
       return bb.array();
     } catch (IllegalArgumentException e) {
       return null;
     }
   }

   /**
    * @param input UUID serialized to bytes
    * @return String representation of UUID
    * returns bytes and null on exception
    */
   @ScalarFunction
   public static String fromUUIDBytes(byte[] input) {
     ByteBuffer bb = ByteBuffer.wrap(input);
     long firstLong = bb.getLong();
     long secondLong = bb.getLong();
     return new UUID(firstLong, secondLong).toString();
   }

   /**
    * @see Normalizer#normalize(CharSequence, Normalizer.Form)
    * @param input
    * @return transforms string with NFC normalization form.
    */
   @ScalarFunction
   public static String normalize(String input) {
     return Normalizer.normalize(input, Normalizer.Form.NFC);
   }

   /**
    * @see Normalizer#normalize(CharSequence, Normalizer.Form)
    * @param input
    * @param form
    * @return transforms string with the specified normalization form
    */
   @ScalarFunction
   public static String normalize(String input, String form) {
     Normalizer.Form targetForm = Normalizer.Form.valueOf(form);
     return Normalizer.normalize(input, targetForm);
   }

   /**
    * @see StringUtils#split(String, String)
    * @param input
    * @param delimiter
    * @return splits string on specified delimiter and returns an array.
    */
   @ScalarFunction(names = {"split", "string_to_array"})
   public static String[] split(String input, String delimiter) {
     return StringUtils.splitByWholeSeparator(input, delimiter);
   }

   /**
    * @param input
    * @param delimiter
    * @param limit
    * @return splits string on specified delimiter limiting the number of results till the specified limit
    */
   @ScalarFunction(names = {"split", "string_to_array"})
   public static String[] split(String input, String delimiter, int limit) {
     return StringUtils.splitByWholeSeparator(input, delimiter, limit);
   }

   /**
    * @param input an input string for prefix strings generations.
    * @param maxlength the max length of the prefix strings for the string.
    * @return generate an array of prefix strings of the string that are shorter than the specified length.
    */
   @ScalarFunction
   public static String[] prefixes(String input, int maxlength) {
     int arrLength = Math.min(maxlength, input.length());
     String[] prefixArr = new String[arrLength];
     for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) {
       prefixArr[prefixIdx - 1] = input.substring(0, prefixIdx);
     }
     return prefixArr;
   }

   /**
    * @param input an input string for prefix strings generations.
    * @param maxlength the max length of the prefix strings for the string.
    * @param prefix the prefix to be prepended to prefix strings generated. e.g. '^' for regex matching
    * @return generate an array of prefix matchers of the string that are shorter than the specified length.
    */
   @ScalarFunction(nullableParameters = true, names = {"prefixesWithPrefix", "prefixes_with_prefix"})
   public static String[] prefixesWithPrefix(String input, int maxlength, @Nullable String prefix) {
     if (prefix == null) {
       return prefixes(input, maxlength);
     }
     int arrLength = Math.min(maxlength, input.length());
     String[] prefixArr = new String[arrLength];
     for (int prefixIdx = 1; prefixIdx <= arrLength; prefixIdx++) {
       prefixArr[prefixIdx - 1] = prefix + input.substring(0, prefixIdx);
     }
     return prefixArr;
   }

   /**
    * @param input an input string for suffix strings generations.
    * @param maxlength the max length of the suffix strings for the string.
    * @return generate an array of suffix strings of the string that are shorter than the specified length.
    */
   @ScalarFunction
   public static String[] suffixes(String input, int maxlength) {
     int arrLength = Math.min(maxlength, input.length());
     String[] suffixArr = new String[arrLength];
     for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) {
       suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx);
     }
     return suffixArr;
   }

   /**
    * @param input an input string for suffix strings generations.
    * @param maxlength the max length of the suffix strings for the string.
    * @param suffix the suffix string to be appended for suffix strings generated. e.g. '$' for regex matching.
    * @return generate an array of suffix matchers of the string that are shorter than the specified length.
    */
   @ScalarFunction(nullableParameters = true, names = {"suffixesWithSuffix", "suffixes_with_suffix"})
   public static String[] suffixesWithSuffix(String input, int maxlength, @Nullable String suffix) {
     if (suffix == null) {
       return suffixes(input, maxlength);
     }
     int arrLength = Math.min(maxlength, input.length());
     String[] suffixArr = new String[arrLength];
     for (int suffixIdx = 1; suffixIdx <= arrLength; suffixIdx++) {
       suffixArr[suffixIdx - 1] = input.substring(input.length() - suffixIdx) + suffix;
     }
     return suffixArr;
   }

   /**
    * @param input an input string for ngram generations.
    * @param length the max length of the ngram for the string.
    * @return generate an array of unique ngram of the string that length are exactly matching the specified length.
    */
   @ScalarFunction
   public static String[] uniqueNgrams(String input, int length) {
     if (length == 0 || length > input.length()) {
       return new String[0];
     }
     ObjectSet<String> ngramSet = new ObjectLinkedOpenHashSet<>();
     for (int i = 0; i < input.length() - length + 1; i++) {
       ngramSet.add(input.substring(i, i + length));
     }
     return ngramSet.toArray(new String[0]);
   }

   /**
    * @param input an input string for ngram generations.
    * @param minGram the min length of the ngram for the string.
    * @param maxGram the max length of the ngram for the string.
    * @return generate an array of ngram of the string that length are within the specified range [minGram, maxGram].
    */
   @ScalarFunction
   public static String[] uniqueNgrams(String input, int minGram, int maxGram) {
     ObjectSet<String> ngramSet = new ObjectLinkedOpenHashSet<>();
     for (int n = minGram; n <= maxGram && n <= input.length(); n++) {
       if (n == 0) {
         continue;
       }
       for (int i = 0; i < input.length() - n + 1; i++) {
         ngramSet.add(input.substring(i, i + n));
       }
     }
     return ngramSet.toArray(new String[0]);
   }

   /**
    * TODO: Revisit if index should be one-based (both Presto and Postgres use one-based index, which starts with 1)
    * @param input
    * @param delimiter
    * @param index we allow negative value for index which indicates the index from the end.
    * @return splits string on specified delimiter and returns String at specified index from the split.
    */
   @ScalarFunction(names = {"splitPart", "split_part"})
   public static String splitPart(String input, String delimiter, int index) {
     String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter);
     if (index >= 0 && index < splitString.length) {
       return splitString[index];
     } else if (index < 0 && index >= -splitString.length) {
       return splitString[splitString.length + index];
     } else {
       return "null";
     }
   }

   /**
    * @param input the input String to be split into parts.
    * @param delimiter the specified delimiter to split the input string.
    * @param limit the max count of parts that the input string can be splitted into.
    * @param index the specified index for the splitted parts to be returned.
    * @return splits string on the delimiter with the limit count and returns String at specified index from the split.
    */
   @ScalarFunction
   public static String splitPart(String input, String delimiter, int limit, int index) {
     String[] splitString = StringUtils.splitByWholeSeparator(input, delimiter, limit);
     if (index >= 0 && index < splitString.length) {
       return splitString[index];
     } else if (index < 0 && index >= -splitString.length) {
       return splitString[splitString.length + index];
     } else {
       return "null";
     }
   }

   /**
    * @see StringUtils#repeat(char, int)
    * @param input
    * @param times
    * @return concatenate the string to itself specified number of times
    */
   @ScalarFunction
   public static String repeat(String input, int times) {
     return StringUtils.repeat(input, times);
   }

   /**
    * @see StringUtils#repeat(String, String, int)
    * @param input
    * @param times
    * @return concatenate the string to itself specified number of times with specified seperator
    */
   @ScalarFunction
   public static String repeat(String input, String sep, int times) {
     return StringUtils.repeat(input, sep, times);
   }

   /**
    * @see StringUtils#remove(String, String)
    * @param input
    * @param search
    * @return removes all instances of search from string
    */
   @ScalarFunction
   public static String remove(String input, String search) {
     return StringUtils.remove(input, search);
   }

   /**
    * @param input1
    * @param input2
    * @return returns the Hamming distance of input1 and input2, note that the two strings must have the same length.
    */
   @ScalarFunction
   public static int hammingDistance(String input1, String input2) {
     if (input1.length() != input2.length()) {
       return -1;
     }
     int distance = 0;
     for (int i = 0; i < input1.length(); i++) {
       if (input1.charAt(i) != input2.charAt(i)) {
         distance++;
       }
     }
     return distance;
   }

   /**
    * @see String#contains(CharSequence)
    * @param input
    * @param substring
    * @return returns true if substring present in main string else false.
    */
   @ScalarFunction
   public static boolean contains(String input, String substring) {
     return input.contains(substring);
   }

   /**
    * Compare input strings lexicographically.
    * @return the value 0 if the first string argument is equal to second string; a value less than 0 if first string
    * argument is lexicographically less than the second string argument; and a value greater than 0 if the first string
    * argument is lexicographically greater than the second string argument.
    */
   @ScalarFunction
   public static int strcmp(String input1, String input2) {
     return input1.compareTo(input2);
   }

   /**
    *
    * @param input plaintext string
    * @return url encoded string
    * @throws UnsupportedEncodingException
    */
   @ScalarFunction
   public static String encodeUrl(String input)
       throws UnsupportedEncodingException {
     return URLEncoder.encode(input, StandardCharsets.UTF_8.toString());
   }

   /**
    *
    * @param input url encoded string
    * @return plaintext string
    * @throws UnsupportedEncodingException
    */
   @ScalarFunction
   public static String decodeUrl(String input)
       throws UnsupportedEncodingException {
     return URLDecoder.decode(input, StandardCharsets.UTF_8.toString());
   }

   /**
    * @param input binary data
    * @return Base64 encoded String
    */
   @ScalarFunction
   public static String toBase64(byte[] input) {
     return Base64.getEncoder().encodeToString(input);
   }

   /**
    * @param input Base64 encoded String
    * @return decoded binary data
    */
   @ScalarFunction
   public static byte[] fromBase64(String input) {
     return Base64.getDecoder().decode(input);
   }

   /**
    * Replace a regular expression pattern. If matchStr is not found, inputStr will be returned. By default, all
    * occurences of match pattern in the input string will be replaced. Default matching pattern is case sensitive.
    *
    * @param inputStr Input string to apply the regexpReplace
    * @param matchStr Regexp or string to match against inputStr
    * @param replaceStr Regexp or string to replace if matchStr is found
    * @param matchStartPos Index of inputStr from where matching should start. Default is 0.
    * @param occurence Controls which occurence of the matched pattern must be replaced. Counting starts at 0. Default
    *                  is -1
    * @param flag Single character flag that controls how the regex finds matches in inputStr. If an incorrect flag is
    *            specified, the function applies default case sensitive match. Only one flag can be specified. Supported
    *             flags:
    *             i -> Case insensitive
    * @return replaced input string
    */
   @ScalarFunction(names = {"regexpReplace", "regexp_replace"})
   public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos,
       int occurence, String flag) {
     Integer patternFlag;

     // TODO: Support more flags like MULTILINE, COMMENTS, etc.
     switch (flag) {
       case "i":
         patternFlag = Pattern.CASE_INSENSITIVE;
         break;
       default:
         patternFlag = null;
         break;
     }

     Pattern p;
     if (patternFlag != null) {
       p = Pattern.compile(matchStr, patternFlag);
     } else {
       p = Pattern.compile(matchStr);
     }

     Matcher matcher = p.matcher(inputStr).region(matchStartPos, inputStr.length());
     StringBuffer sb;

     if (occurence >= 0) {
       sb = new StringBuffer(inputStr);
       while (occurence >= 0 && matcher.find()) {
         if (occurence == 0) {
           sb.replace(matcher.start(), matcher.end(), replaceStr);
           break;
         }
         occurence--;
       }
     } else {
       sb = new StringBuffer();
       while (matcher.find()) {
         matcher.appendReplacement(sb, replaceStr);
       }
       matcher.appendTail(sb);
     }

     return sb.toString();
   }

   /**
    * See #regexpReplace(String, String, String, int, int, String). Matches against entire inputStr and replaces all
    * occurences. Match is performed in case-sensitive mode.
    *
    * @param inputStr Input string to apply the regexpReplace
    * @param matchStr Regexp or string to match against inputStr
    * @param replaceStr Regexp or string to replace if matchStr is found
    * @return replaced input string
    */
   @ScalarFunction(names = {"regexpReplace", "regexp_replace"})
   public static String regexpReplace(String inputStr, String matchStr, String replaceStr) {
     return regexpReplace(inputStr, matchStr, replaceStr, 0, -1, "");
   }

   /**
    * See #regexpReplace(String, String, String, int, int, String). Matches against entire inputStr and replaces all
    * occurences. Match is performed in case-sensitive mode.
    *
    * @param inputStr Input string to apply the regexpReplace
    * @param matchStr Regexp or string to match against inputStr
    * @param replaceStr Regexp or string to replace if matchStr is found
    * @param matchStartPos Index of inputStr from where matching should start. Default is 0.
    * @return replaced input string
    */
   @ScalarFunction(names = {"regexpReplace", "regexp_replace"})
   public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos) {
     return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, -1, "");
   }

   /**
    * See #regexpReplace(String, String, String, int, int, String). Match is performed in case-sensitive mode.
    *
    * @param inputStr Input string to apply the regexpReplace
    * @param matchStr Regexp or string to match against inputStr
    * @param replaceStr Regexp or string to replace if matchStr is found
    * @param matchStartPos Index of inputStr from where matching should start. Default is 0.
    * @param occurence Controls which occurence of the matched pattern must be replaced. Counting starts
    *                    at 0. Default is -1
    * @return replaced input string
    */
   @ScalarFunction(names = {"regexpReplace", "regexp_replace"})
   public static String regexpReplace(String inputStr, String matchStr, String replaceStr, int matchStartPos,
       int occurence) {
     return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, occurence, "");
   }

   @ScalarFunction(names = {"regexpLike", "regexp_like"})
   public static boolean regexpLike(String inputStr, String regexPatternStr) {
     Pattern pattern = Pattern.compile(regexPatternStr, Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE);
     return pattern.matcher(inputStr).find();
   }

   @ScalarFunction
   public static boolean like(String inputStr, String likePatternStr) {
     String regexPatternStr = RegexpPatternConverterUtils.likeToRegexpLike(likePatternStr);
     return regexpLike(inputStr, regexPatternStr);
   }

   /**
    * Checks whether the input string can be parsed into a json node or not. Useful for scenarios where we want
    * to filter out malformed json.
    * Null values are handled by the function invoker here and this function processes the results on non-null values.
    *
    * @param inputStr Input string to test for valid json
    * @return  true in case of valid json parsing else false
    *
    */
   @ScalarFunction(names = {"isJson", "is_json"})
   public static boolean isJson(String inputStr) {
     try {
       JsonUtils.stringToJsonNode(inputStr);
       return true;
     } catch (Exception e) {
       return false;
     }
   }
 }