| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to you under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.flink.table.codegen.util; |
| |
| /** |
| * Utilities for converting SQL {@code LIKE} and {@code SIMILAR} operators |
| * to regular expressions. |
| * |
| * <p>Copy to Flink from Calcite is to expose some private method. |
| */ |
| public class FlinkLike { |
| private static final String JAVA_REGEX_SPECIALS = "[]()|^-+*?{}$\\"; |
| private static final String SQL_SIMILAR_SPECIALS = "[]()|^-+*_%?{}"; |
| private static final String[] REG_CHAR_CLASSES = { |
| "[:ALPHA:]", "\\p{Alpha}", |
| "[:alpha:]", "\\p{Alpha}", |
| "[:UPPER:]", "\\p{Upper}", |
| "[:upper:]", "\\p{Upper}", |
| "[:LOWER:]", "\\p{Lower}", |
| "[:lower:]", "\\p{Lower}", |
| "[:DIGIT:]", "\\d", |
| "[:digit:]", "\\d", |
| "[:SPACE:]", " ", |
| "[:space:]", " ", |
| "[:WHITESPACE:]", "\\s", |
| "[:whitespace:]", "\\s", |
| "[:ALNUM:]", "\\p{Alnum}", |
| "[:alnum:]", "\\p{Alnum}" |
| }; |
| |
| private FlinkLike() { |
| } |
| |
| /** |
| * Translates a SQL LIKE pattern to Java regex pattern, with optional |
| * escape string. |
| */ |
| public static String sqlToRegexLike( |
| String sqlPattern, |
| CharSequence escapeStr) { |
| final char escapeChar; |
| if (escapeStr != null) { |
| if (escapeStr.length() != 1) { |
| throw invalidEscapeCharacter(escapeStr.toString()); |
| } |
| escapeChar = escapeStr.charAt(0); |
| } else { |
| escapeChar = 0; |
| } |
| return sqlToRegexLike(sqlPattern, escapeChar); |
| } |
| |
| /** |
| * Translates a SQL LIKE pattern to Java regex pattern. |
| */ |
| static String sqlToRegexLike( |
| String sqlPattern, |
| char escapeChar) { |
| int i; |
| final int len = sqlPattern.length(); |
| final StringBuilder javaPattern = new StringBuilder(len + len); |
| for (i = 0; i < len; i++) { |
| char c = sqlPattern.charAt(i); |
| if (JAVA_REGEX_SPECIALS.indexOf(c) >= 0) { |
| javaPattern.append('\\'); |
| } |
| if (c == escapeChar) { |
| if (i == (sqlPattern.length() - 1)) { |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| char nextChar = sqlPattern.charAt(i + 1); |
| if ((nextChar == '_') |
| || (nextChar == '%') |
| || (nextChar == escapeChar)) { |
| javaPattern.append(nextChar); |
| i++; |
| } else { |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| } else if (c == '_') { |
| javaPattern.append('.'); |
| } else if (c == '%') { |
| javaPattern.append("(?s:.*)"); |
| } else { |
| javaPattern.append(c); |
| } |
| } |
| return javaPattern.toString(); |
| } |
| |
| public static RuntimeException invalidEscapeCharacter(String s) { |
| return new RuntimeException( |
| "Invalid escape character '" + s + "'"); |
| } |
| |
| public static RuntimeException invalidEscapeSequence(String s, int i) { |
| return new RuntimeException( |
| "Invalid escape sequence '" + s + "', " + i); |
| } |
| |
| private static void similarEscapeRuleChecking( |
| String sqlPattern, |
| char escapeChar) { |
| if (escapeChar == 0) { |
| return; |
| } |
| if (SQL_SIMILAR_SPECIALS.indexOf(escapeChar) >= 0) { |
| // The the escape character is a special character |
| // SQL 2003 Part 2 Section 8.6 General Rule 3.b |
| for (int i = 0; i < sqlPattern.length(); i++) { |
| if (sqlPattern.charAt(i) == escapeChar) { |
| if (i == (sqlPattern.length() - 1)) { |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| char c = sqlPattern.charAt(i + 1); |
| if ((SQL_SIMILAR_SPECIALS.indexOf(c) < 0) |
| && (c != escapeChar)) { |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| } |
| } |
| } |
| |
| // SQL 2003 Part 2 Section 8.6 General Rule 3.c |
| if (escapeChar == ':') { |
| int position; |
| position = sqlPattern.indexOf("[:"); |
| if (position >= 0) { |
| position = sqlPattern.indexOf(":]"); |
| } |
| if (position < 0) { |
| throw invalidEscapeSequence(sqlPattern, position); |
| } |
| } |
| } |
| |
| private static RuntimeException invalidRegularExpression( |
| String pattern, int i) { |
| return new RuntimeException( |
| "Invalid regular expression '" + pattern + "'"); |
| } |
| |
| private static int sqlSimilarRewriteCharEnumeration( |
| String sqlPattern, |
| StringBuilder javaPattern, |
| int pos, |
| char escapeChar) { |
| int i; |
| for (i = pos + 1; i < sqlPattern.length(); i++) { |
| char c = sqlPattern.charAt(i); |
| if (c == ']') { |
| return i - 1; |
| } else if (c == escapeChar) { |
| i++; |
| char nextChar = sqlPattern.charAt(i); |
| if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) { |
| if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) { |
| javaPattern.append('\\'); |
| } |
| javaPattern.append(nextChar); |
| } else if (escapeChar == nextChar) { |
| javaPattern.append(nextChar); |
| } else { |
| throw invalidRegularExpression(sqlPattern, i); |
| } |
| } else if (c == '-') { |
| javaPattern.append('-'); |
| } else if (c == '^') { |
| javaPattern.append('^'); |
| } else if (sqlPattern.startsWith("[:", i)) { |
| int numOfRegCharSets = REG_CHAR_CLASSES.length / 2; |
| boolean found = false; |
| for (int j = 0; j < numOfRegCharSets; j++) { |
| if (sqlPattern.startsWith(REG_CHAR_CLASSES[j + j], i)) { |
| javaPattern.append(REG_CHAR_CLASSES[j + j + 1]); |
| |
| i += REG_CHAR_CLASSES[j + j].length() - 1; |
| found = true; |
| break; |
| } |
| } |
| if (!found) { |
| throw invalidRegularExpression(sqlPattern, i); |
| } |
| } else if (SQL_SIMILAR_SPECIALS.indexOf(c) >= 0) { |
| throw invalidRegularExpression(sqlPattern, i); |
| } else { |
| javaPattern.append(c); |
| } |
| } |
| return i - 1; |
| } |
| |
| /** |
| * Translates a SQL SIMILAR pattern to Java regex pattern, with optional |
| * escape string. |
| */ |
| static String sqlToRegexSimilar( |
| String sqlPattern, |
| CharSequence escapeStr) { |
| final char escapeChar; |
| if (escapeStr != null) { |
| if (escapeStr.length() != 1) { |
| throw invalidEscapeCharacter(escapeStr.toString()); |
| } |
| escapeChar = escapeStr.charAt(0); |
| } else { |
| escapeChar = 0; |
| } |
| return sqlToRegexSimilar(sqlPattern, escapeChar); |
| } |
| |
| /** |
| * Translates SQL SIMILAR pattern to Java regex pattern. |
| */ |
| static String sqlToRegexSimilar( |
| String sqlPattern, |
| char escapeChar) { |
| similarEscapeRuleChecking(sqlPattern, escapeChar); |
| |
| boolean insideCharacterEnumeration = false; |
| final StringBuilder javaPattern = |
| new StringBuilder(sqlPattern.length() * 2); |
| final int len = sqlPattern.length(); |
| for (int i = 0; i < len; i++) { |
| char c = sqlPattern.charAt(i); |
| if (c == escapeChar) { |
| if (i == (len - 1)) { |
| // It should never reach here after the escape rule |
| // checking. |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| char nextChar = sqlPattern.charAt(i + 1); |
| if (SQL_SIMILAR_SPECIALS.indexOf(nextChar) >= 0) { |
| // special character, use \ to replace the escape char. |
| if (JAVA_REGEX_SPECIALS.indexOf(nextChar) >= 0) { |
| javaPattern.append('\\'); |
| } |
| javaPattern.append(nextChar); |
| } else if (nextChar == escapeChar) { |
| javaPattern.append(nextChar); |
| } else { |
| // It should never reach here after the escape rule |
| // checking. |
| throw invalidEscapeSequence(sqlPattern, i); |
| } |
| i++; // we already process the next char. |
| } else { |
| switch (c) { |
| case '_': |
| javaPattern.append('.'); |
| break; |
| case '%': |
| javaPattern.append("(?s:.*)"); |
| break; |
| case '[': |
| javaPattern.append('['); |
| insideCharacterEnumeration = true; |
| i = sqlSimilarRewriteCharEnumeration( |
| sqlPattern, |
| javaPattern, |
| i, |
| escapeChar); |
| break; |
| case ']': |
| if (!insideCharacterEnumeration) { |
| throw invalidRegularExpression(sqlPattern, i); |
| } |
| insideCharacterEnumeration = false; |
| javaPattern.append(']'); |
| break; |
| case '\\': |
| javaPattern.append("\\\\"); |
| break; |
| case '$': |
| |
| // $ is special character in java regex, but regular in |
| // SQL regex. |
| javaPattern.append("\\$"); |
| break; |
| default: |
| javaPattern.append(c); |
| } |
| } |
| } |
| if (insideCharacterEnumeration) { |
| throw invalidRegularExpression(sqlPattern, len); |
| } |
| |
| return javaPattern.toString(); |
| } |
| } |