blob: ec4aa3a538aaaa82c8074299d8002c5d548098d4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.expression;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.phoenix.expression.util.regex.AbstractBasePattern;
import org.apache.phoenix.expression.visitor.ExpressionVisitor;
import org.apache.phoenix.parse.LikeParseNode.LikeType;
import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.tuple.Tuple;
import org.apache.phoenix.schema.types.PBoolean;
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PVarchar;
import org.apache.phoenix.util.StringUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
/**
*
* Implementation for LIKE operation where the first child expression is the string
* and the second is the pattern. The pattern supports '_' character for single
* character wildcard match and '%' for zero or more character match. where these
* characters may be escaped by preceding them with a '\'.
*
* Example: foo LIKE 'ab%' will match a row in which foo starts with 'ab'
*
*
* @since 0.1
*/
public abstract class LikeExpression extends BaseCompoundExpression {
private static final Logger logger = LoggerFactory.getLogger(LikeExpression.class);
private static final String ZERO_OR_MORE = "\\E.*\\Q";
private static final String ANY_ONE = "\\E.\\Q";
/**
* Store whether this like expression has to be case sensitive or not.
*/
private LikeType likeType;
public static String unescapeLike(String s) {
return StringUtil.replace(s, StringUtil.LIKE_ESCAPE_SEQS, StringUtil.LIKE_UNESCAPED_SEQS);
}
/**
* @return the substring of s for which we have a literal string
* that we can potentially use to set the start/end key, or null
* if there is none.
*/
public static String getStartsWithPrefix(String s) {
int i = indexOfWildcard(s);
return i == -1 ? s : s.substring(0,i);
}
public static boolean hasWildcards(String s) {
return indexOfWildcard(s) != -1;
}
/**
* Replace unescaped '*' and '?' in s with '%' and '_' respectively
* such that the returned string may be used in a LIKE expression.
* Provides an alternate way of expressing a LIKE pattern which is
* more friendly for wildcard matching when the source string is
* likely to contain an '%' or '_' character.
* @param s wildcard pattern that may use '*' for multi character
* match and '?' for single character match, escaped by the backslash
* character
* @return replaced
*/
public static String wildCardToLike(String s) {
s = StringUtil.escapeLike(s);
StringBuilder buf = new StringBuilder();
// Look for another unprotected * or ? in the middle
int i = 0;
int j = 0;
while (true) {
int pctPos = s.indexOf(StringUtil.MULTI_CHAR_WILDCARD, i);
int underPos = s.indexOf(StringUtil.SINGLE_CHAR_WILDCARD, i);
if (pctPos == -1 && underPos == -1) {
return i == 0 ? s : buf.append(s.substring(i)).toString();
}
i = pctPos;
if (underPos != -1 && (i == -1 || underPos < i)) {
i = underPos;
}
if (i > 0 && s.charAt(i - 1) == '\\') {
// If we found protection then keep looking
buf.append(s.substring(j,i-1));
buf.append(s.charAt(i));
} else {
// We found an unprotected % or _ in the middle
buf.append(s.substring(j,i));
buf.append(s.charAt(i) == StringUtil.MULTI_CHAR_WILDCARD ? StringUtil.MULTI_CHAR_LIKE : StringUtil.SINGLE_CHAR_LIKE);
}
j = ++i;
}
}
public static int indexOfWildcard(String s) {
// Look for another unprotected % or _ in the middle
if (s == null) {
return -1;
}
int i = 0;
while (true) {
int pctPos = s.indexOf(StringUtil.MULTI_CHAR_LIKE, i);
int underPos = s.indexOf(StringUtil.SINGLE_CHAR_LIKE, i);
if (pctPos == -1 && underPos == -1) {
return -1;
}
i = pctPos;
if (underPos != -1 && (i == -1 || underPos < i)) {
i = underPos;
}
if (i > 0 && s.charAt(i - 1) == '\\') {
// If we found protection then keep looking
i++;
} else {
// We found an unprotected % or _ in the middle
return i;
}
}
}
private static String toPattern(String s) {
StringBuilder sb = new StringBuilder(s.length());
// From the JDK doc: \Q and \E protect everything between them
sb.append("\\Q");
boolean wasSlash = false;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (wasSlash) {
sb.append(c);
wasSlash = false;
} else if (c == StringUtil.SINGLE_CHAR_LIKE) {
sb.append(ANY_ONE);
} else if (c == StringUtil.MULTI_CHAR_LIKE) {
sb.append(ZERO_OR_MORE);
} else if (c == '\\') {
wasSlash = true;
} else {
sb.append(c);
}
}
sb.append("\\E");
// Found nothing interesting
return sb.toString();
}
// private static String fromPattern(String s) {
// StringBuilder sb = new StringBuilder(s.length());
//
// for (int i = 0; i < s.length(); i++) {
// if (s.substring(i).startsWith("\\Q")) {
// while (s.substring(i + "\\Q".length()).startsWith("\\E")) {
// sb.append(s.charAt(i++ + "\\Q".length()));
// }
// i+= "\\E".length();
// }
// if (s.charAt(i) == '.') {
// if (s.charAt(i+1) == '*') {
// sb.append('%');
// i+=2;
// } else {
// sb.append('_');
// i++;
// }
// }
// }
// return sb.toString();
// }
private static final int LIKE_TYPE_INDEX = 2;
private static final LiteralExpression[] LIKE_TYPE_LITERAL = new LiteralExpression[LikeType.values().length];
static {
for (LikeType likeType : LikeType.values()) {
LIKE_TYPE_LITERAL[likeType.ordinal()] = LiteralExpression.newConstant(likeType.name());
}
}
private AbstractBasePattern pattern;
public LikeExpression() {
}
protected static List<Expression> addLikeTypeChild(List<Expression> children, LikeType likeType) {
List<Expression> newChildren = Lists.newArrayListWithExpectedSize(children.size()+1);
newChildren.addAll(children);
newChildren.add(LIKE_TYPE_LITERAL[likeType.ordinal()]);
return newChildren;
}
public LikeExpression(List<Expression> children) {
super(children);
init();
}
public LikeType getLikeType () {
return likeType;
}
public boolean startsWithWildcard() {
return pattern != null && pattern.pattern().startsWith("\\Q\\E");
}
private void init() {
List<Expression> children = getChildren();
if (children.size() <= LIKE_TYPE_INDEX) {
this.likeType = LikeType.CASE_SENSITIVE;
} else {
LiteralExpression likeTypeExpression = (LiteralExpression)children.get(LIKE_TYPE_INDEX);
this.likeType = LikeType.valueOf((String)likeTypeExpression.getValue());
}
ImmutableBytesWritable ptr = new ImmutableBytesWritable();
Expression e = getPatternExpression();
if (e.isStateless() && e.getDeterminism() == Determinism.ALWAYS && e.evaluate(null, ptr)) {
String value = (String) PVarchar.INSTANCE.toObject(ptr, e.getDataType(), e.getSortOrder());
pattern = compilePattern(value);
}
}
protected abstract AbstractBasePattern compilePatternSpec(String value);
protected AbstractBasePattern compilePattern(String value) {
if (likeType == LikeType.CASE_SENSITIVE) {
return compilePatternSpec(toPattern(value));
} else {
return compilePatternSpec("(?i)" + toPattern(value));
}
}
private Expression getStrExpression() {
return children.get(0);
}
private Expression getPatternExpression() {
return children.get(1);
}
@Override
public boolean evaluate(Tuple tuple, ImmutableBytesWritable ptr) {
AbstractBasePattern pattern = this.pattern;
if (pattern == null) { // TODO: don't allow? this is going to be slooowwww
if (!getPatternExpression().evaluate(tuple, ptr)) {
if (logger.isTraceEnabled()) {
logger.trace("LIKE is FALSE: pattern is null");
}
return false;
}
String value = (String) PVarchar.INSTANCE.toObject(ptr, getPatternExpression().getSortOrder());
pattern = compilePattern(value);
if (logger.isTraceEnabled()) {
logger.trace("LIKE pattern is expression: " + pattern.pattern());
}
}
Expression strExpression = getStrExpression();
SortOrder strSortOrder = strExpression.getSortOrder();
PVarchar strDataType = PVarchar.INSTANCE;
if (!strExpression.evaluate(tuple, ptr)) {
if (logger.isTraceEnabled()) {
logger.trace("LIKE is FALSE: child expression is null");
}
return false;
}
String value = null;
if (logger.isTraceEnabled()) {
value = (String) strDataType.toObject(ptr, strSortOrder);
}
strDataType.coerceBytes(ptr, strDataType, strSortOrder, SortOrder.ASC);
pattern.matches(ptr);
if (logger.isTraceEnabled()) {
boolean matched = ((Boolean) PBoolean.INSTANCE.toObject(ptr)).booleanValue();
logger.trace("LIKE(value='" + value + "'pattern='" + pattern.pattern() + "' is " + matched);
}
return true;
}
@Override
public void readFields(DataInput input) throws IOException {
super.readFields(input);
init();
}
@Override
public void write(DataOutput output) throws IOException {
super.write(output);
}
@Override
public PDataType getDataType() {
return PBoolean.INSTANCE;
}
@Override
public final <T> T accept(ExpressionVisitor<T> visitor) {
List<T> l = acceptChildren(visitor, visitor.visitEnter(this));
T t = visitor.visitLeave(this, l);
if (t == null) {
t = visitor.defaultReturn(this, l);
}
return t;
}
public String getLiteralPrefix() {
if (pattern == null) {
return "";
}
String pattern = this.pattern.pattern();
int fromIndex = "\\Q".length();
return pattern.substring(fromIndex, pattern.indexOf("\\E", fromIndex));
}
public boolean endsWithOnlyWildcard() {
if (pattern == null) {
return false;
}
String pattern = this.pattern.pattern();
String endsWith = ZERO_OR_MORE + "\\E";
return pattern.endsWith(endsWith) &&
pattern.lastIndexOf(ANY_ONE, pattern.length() - endsWith.length() - 1) == -1 &&
pattern.lastIndexOf(ZERO_OR_MORE, pattern.length() - endsWith.length() - 1) == -1;
}
@Override
public String toString() {
return (children.get(0) + " LIKE " + children.get(1));
}
abstract public LikeExpression clone(List<Expression> children);
}