| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.beam.sdk.extensions.sql.impl.udf; |
| |
| import static java.nio.charset.StandardCharsets.UTF_8; |
| |
| import com.google.auto.service.AutoService; |
| import java.util.Arrays; |
| import org.apache.beam.sdk.schemas.Schema.TypeName; |
| import org.apache.commons.codec.DecoderException; |
| import org.apache.commons.codec.binary.Hex; |
| import org.apache.commons.lang3.ArrayUtils; |
| import org.apache.commons.lang3.StringUtils; |
| |
| /** BuiltinStringFunctions. */ |
| @AutoService(BeamBuiltinFunctionProvider.class) |
| public class BuiltinStringFunctions extends BeamBuiltinFunctionProvider { |
| |
| // return a explicitly null for Boolean has NP_BOOLEAN_RETURN_NULL warning. |
| // return null for boolean is not allowed. |
| // TODO: handle null input. |
| @UDF( |
| funcName = "ENDS_WITH", |
| parameterArray = {TypeName.STRING}, |
| returnType = TypeName.STRING) |
| public Boolean endsWith(String str1, String str2) { |
| return str1.endsWith(str2); |
| } |
| |
| // return a explicitly null for Boolean has NP_BOOLEAN_RETURN_NULL warning. |
| // return null for boolean is not allowed. |
| // TODO: handle null input. |
| @UDF( |
| funcName = "STARTS_WITH", |
| parameterArray = {TypeName.STRING}, |
| returnType = TypeName.STRING) |
| public Boolean startsWith(String str1, String str2) { |
| return str1.startsWith(str2); |
| } |
| |
| @UDF( |
| funcName = "LENGTH", |
| parameterArray = {TypeName.STRING}, |
| returnType = TypeName.INT64) |
| public Long lengthString(String str) { |
| if (str == null) { |
| return null; |
| } |
| return (long) str.length(); |
| } |
| |
| @UDF( |
| funcName = "LENGTH", |
| parameterArray = {TypeName.BYTES}, |
| returnType = TypeName.INT64) |
| public Long lengthBytes(byte[] bytes) { |
| if (bytes == null) { |
| return null; |
| } |
| return (long) bytes.length; |
| } |
| |
| @UDF( |
| funcName = "REVERSE", |
| parameterArray = {TypeName.STRING}, |
| returnType = TypeName.STRING) |
| public String reverseString(String str) { |
| if (str == null) { |
| return null; |
| } |
| return new StringBuilder(str).reverse().toString(); |
| } |
| |
| @UDF( |
| funcName = "REVERSE", |
| parameterArray = {TypeName.BYTES}, |
| returnType = TypeName.BYTES) |
| public byte[] reverseBytes(byte[] bytes) { |
| if (bytes == null) { |
| return null; |
| } |
| byte[] ret = Arrays.copyOf(bytes, bytes.length); |
| ArrayUtils.reverse(ret); |
| return ret; |
| } |
| |
| @UDF( |
| funcName = "FROM_HEX", |
| parameterArray = {TypeName.STRING}, |
| returnType = TypeName.BYTES) |
| public byte[] fromHex(String str) { |
| if (str == null) { |
| return null; |
| } |
| |
| try { |
| return Hex.decodeHex(str.toCharArray()); |
| } catch (DecoderException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| @UDF( |
| funcName = "TO_HEX", |
| parameterArray = {TypeName.BYTES}, |
| returnType = TypeName.STRING) |
| public String toHex(byte[] bytes) { |
| if (bytes == null) { |
| return null; |
| } |
| |
| return Hex.encodeHexString(bytes); |
| } |
| |
| @UDF( |
| funcName = "LPAD", |
| parameterArray = {TypeName.STRING, TypeName.INT64}, |
| returnType = TypeName.STRING) |
| public String lpad(String originalValue, Long returnLength) { |
| return lpad(originalValue, returnLength, " "); |
| } |
| |
| @UDF( |
| funcName = "LPAD", |
| parameterArray = {TypeName.STRING, TypeName.INT64, TypeName.STRING}, |
| returnType = TypeName.STRING) |
| public String lpad(String originalValue, Long returnLength, String pattern) { |
| if (originalValue == null || returnLength == null || pattern == null) { |
| return null; |
| } |
| |
| if (returnLength < -1 || pattern.isEmpty()) { |
| throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty."); |
| } |
| |
| if (originalValue.length() == returnLength) { |
| return originalValue; |
| } else if (originalValue.length() < returnLength) { // add padding to left |
| return StringUtils.leftPad(originalValue, Math.toIntExact(returnLength), pattern); |
| } else { // truncating string by str.substring |
| // Java String can only hold a string with Integer.MAX_VALUE as longest length. |
| return originalValue.substring(0, Math.toIntExact(returnLength)); |
| } |
| } |
| |
| @UDF( |
| funcName = "LPAD", |
| parameterArray = {TypeName.BYTES, TypeName.INT64}, |
| returnType = TypeName.BYTES) |
| public byte[] lpad(byte[] originalValue, Long returnLength) { |
| return lpad(originalValue, returnLength, " ".getBytes(UTF_8)); |
| } |
| |
| @UDF( |
| funcName = "LPAD", |
| parameterArray = {TypeName.BYTES, TypeName.INT64, TypeName.BYTES}, |
| returnType = TypeName.BYTES) |
| public byte[] lpad(byte[] originalValue, Long returnLength, byte[] pattern) { |
| if (originalValue == null || returnLength == null || pattern == null) { |
| return null; |
| } |
| if (returnLength < -1 || pattern.length == 0) { |
| throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty."); |
| } |
| |
| int returnLengthInt = Math.toIntExact(returnLength); |
| |
| if (originalValue.length == returnLengthInt) { |
| return originalValue; |
| } else if (originalValue.length < returnLengthInt) { // add padding to left |
| byte[] ret = new byte[returnLengthInt]; |
| // step one: pad #(returnLengthInt - originalValue.length) bytes to left side. |
| int paddingOff = 0; |
| int paddingLeftBytes = returnLengthInt - originalValue.length; |
| byteArrayPadding(ret, pattern, paddingOff, paddingLeftBytes); |
| |
| // step two: copy originalValue. |
| System.arraycopy( |
| originalValue, 0, ret, returnLengthInt - originalValue.length, originalValue.length); |
| return ret; |
| } else { // truncating string by str.substring |
| // Java String can only hold a string with Integer.MAX_VALUE as longest length. |
| byte[] ret = new byte[returnLengthInt]; |
| System.arraycopy(originalValue, 0, ret, 0, returnLengthInt); |
| return ret; |
| } |
| } |
| |
| @UDF( |
| funcName = "RPAD", |
| parameterArray = {TypeName.STRING, TypeName.INT64}, |
| returnType = TypeName.STRING) |
| public String rpad(String originalValue, Long returnLength) { |
| return lpad(originalValue, returnLength, " "); |
| } |
| |
| @UDF( |
| funcName = "RPAD", |
| parameterArray = {TypeName.STRING, TypeName.INT64, TypeName.STRING}, |
| returnType = TypeName.STRING) |
| public String rpad(String originalValue, Long returnLength, String pattern) { |
| if (originalValue == null || returnLength == null || pattern == null) { |
| return null; |
| } |
| |
| if (returnLength < -1 || pattern.isEmpty()) { |
| throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty."); |
| } |
| |
| if (originalValue.length() == returnLength) { |
| return originalValue; |
| } else if (originalValue.length() < returnLength) { // add padding to right |
| return StringUtils.rightPad(originalValue, Math.toIntExact(returnLength), pattern); |
| } else { // truncating string by str.substring |
| // Java String can only hold a string with Integer.MAX_VALUE as longest length. |
| return originalValue.substring(0, Math.toIntExact(returnLength)); |
| } |
| } |
| |
| @UDF( |
| funcName = "RPAD", |
| parameterArray = {TypeName.BYTES, TypeName.INT64}, |
| returnType = TypeName.BYTES) |
| public byte[] rpad(byte[] originalValue, Long returnLength) { |
| return lpad(originalValue, returnLength, " ".getBytes(UTF_8)); |
| } |
| |
| @UDF( |
| funcName = "RPAD", |
| parameterArray = {TypeName.BYTES, TypeName.INT64, TypeName.BYTES}, |
| returnType = TypeName.BYTES) |
| public byte[] rpad(byte[] originalValue, Long returnLength, byte[] pattern) { |
| if (originalValue == null || returnLength == null || pattern == null) { |
| return null; |
| } |
| if (returnLength < -1 || pattern.length == 0) { |
| throw new IllegalArgumentException("returnLength cannot be 0 or pattern cannot be empty."); |
| } |
| |
| int returnLengthInt = Math.toIntExact(returnLength); |
| |
| if (originalValue.length == returnLengthInt) { |
| return originalValue; |
| } else if (originalValue.length < returnLengthInt) { // add padding to right |
| byte[] ret = new byte[returnLengthInt]; |
| // step one: copy originalValue. |
| System.arraycopy(originalValue, 0, ret, 0, originalValue.length); |
| |
| // step one: pad #(returnLengthInt - originalValue.length) bytes to right side. |
| int paddingOff = originalValue.length; |
| int paddingLeftBytes = returnLengthInt - originalValue.length; |
| byteArrayPadding(ret, pattern, paddingOff, paddingLeftBytes); |
| return ret; |
| } else { // truncating string by str.substring |
| // Java String can only hold a string with Integer.MAX_VALUE as longest length. |
| byte[] ret = new byte[returnLengthInt]; |
| System.arraycopy(originalValue, 0, ret, 0, returnLengthInt); |
| return ret; |
| } |
| } |
| |
| private void byteArrayPadding(byte[] dest, byte[] pattern, int paddingOff, int paddingLeftBytes) { |
| while (paddingLeftBytes > 0) { |
| if (paddingLeftBytes >= pattern.length) { |
| // pad the whole pattern |
| System.arraycopy(pattern, 0, dest, paddingOff, pattern.length); |
| paddingLeftBytes -= pattern.length; |
| paddingOff += pattern.length; |
| } else { |
| System.arraycopy(pattern, 0, dest, paddingOff, paddingLeftBytes); |
| paddingLeftBytes = 0; |
| } |
| } |
| } |
| } |