blob: e49f856d1f928c9e262c17333bfa74c56e87b96d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.spi.query.fulltext;
/**
* A fulltext term, or a "not" term.
*/
public class FullTextTerm extends FullTextExpression {
private final boolean not;
private final String propertyName;
private final String text;
private final String filteredText;
private final String boost;
private final LikePattern like;
public FullTextTerm(String propertyName, FullTextTerm copy) {
this.propertyName = propertyName;
this.not = copy.not;
this.text = copy.text;
this.filteredText = copy.filteredText;
this.boost = copy.boost;
this.like = copy.like;
}
public FullTextTerm(String propertyName, String text, boolean not, boolean escaped, String boost) {
this.propertyName = propertyName;
this.text = text;
this.not = not;
this.boost = boost;
// for testFulltextIntercapSQL
// filter special characters such as '
// to make tests pass, for example the
// FulltextQueryTest.testFulltextExcludeSQL,
// which searches for:
// "text ''fox jumps'' -other"
// (please note the two single quotes instead of
// double quotes before for and after jumps)
boolean pattern = false;
if (escaped) {
filteredText = text;
} else {
StringBuilder buff = new StringBuilder();
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c == '*') {
buff.append('%');
pattern = true;
} else if (c == '?') {
buff.append('_');
pattern = true;
} else if (c == '_') {
buff.append("\\_");
pattern = true;
} else if (isFullTextCharacter(c) || " +-:&/.".indexOf(c) >= 0) {
buff.append(c);
}
}
this.filteredText = buff.toString().toLowerCase();
}
if (pattern) {
like = new LikePattern("%" + filteredText + "%");
} else {
like = null;
}
}
/**
* Whether or not the given character is part of a full-text term that
* should be indexed. Not indexed are punctuation, control characters such as tab,
*
* See also <a href=
* "http://en.wikipedia.org/wiki/Character_property_(Unicode)#General_Category"
* > Unicode Categories</a>.
*
* @param c the character
* @return true if the character should be indexed
*/
public static boolean isFullTextCharacter(char c) {
switch (Character.getType(c)) {
// Category "Letter" (Lu, Ll, Lt, Lm, Lo)
case Character.UPPERCASE_LETTER:
case Character.LOWERCASE_LETTER:
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.OTHER_LETTER:
return true;
// Category "Number" (Nd, Nl, No)
case Character.DECIMAL_DIGIT_NUMBER:
case Character.LETTER_NUMBER:
case Character.OTHER_NUMBER:
return true;
// Category "Symbol" (Sm, Sc, Sk, So)
case Character.MATH_SYMBOL:
case Character.CURRENCY_SYMBOL:
case Character.MODIFIER_SYMBOL:
case Character.OTHER_SYMBOL:
return true;
// Category "Control" (Cc, Cf)
case Character.CONTROL:
case Character.FORMAT:
return false;
// Category "Control" (Cs, Co, Cn)
case Character.SURROGATE:
case Character.PRIVATE_USE:
case Character.UNASSIGNED:
return true;
// Category "Mark" (Mn, Mc, Me)
case Character.NON_SPACING_MARK:
case Character.COMBINING_SPACING_MARK:
case Character.ENCLOSING_MARK:
return false;
// Category "Punctuation" (Pc, Pd, Ps, Pe, Pi, Pf, Po)
case Character.CONNECTOR_PUNCTUATION:
case Character.DASH_PUNCTUATION:
case Character.START_PUNCTUATION:
case Character.END_PUNCTUATION:
case Character.INITIAL_QUOTE_PUNCTUATION:
case Character.FINAL_QUOTE_PUNCTUATION:
case Character.OTHER_PUNCTUATION:
return false;
// Category "Separator" (Zs, Zl, Zp)
case Character.SPACE_SEPARATOR:
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
return false;
}
// unknown
return true;
}
@Override
public boolean evaluate(String value) {
// toLowerCase for testFulltextIntercapSQL
value = value.toLowerCase();
if (like != null) {
return like.matches(value);
}
if (not) {
return value.indexOf(filteredText) < 0;
}
return value.indexOf(filteredText) >= 0;
}
@Override
public
FullTextExpression simplify() {
return this;
}
@Override
public String toString() {
StringBuilder buff = new StringBuilder();
if (not) {
buff.append('-');
}
if (propertyName != null && !"*".equals(propertyName)) {
buff.append(propertyName).append(':');
}
buff.append('\"');
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c == '\\') {
buff.append(c);
} else if (c == '\"') {
buff.append('\\');
}
buff.append(c);
}
buff.append('\"');
if (boost != null) {
buff.append('^').append(boost);
}
return buff.toString();
}
public String getPropertyName() {
return propertyName;
}
public String getBoost() {
return boost;
}
@Override
public boolean isNot() {
return not;
}
public String getText() {
return text;
}
@Override
public int getPrecedence() {
return PRECEDENCE_TERM;
}
@Override
public boolean accept(FullTextVisitor v) {
return v.visit(this);
}
}