| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.jackrabbit.oak.query; |
| |
| import java.util.BitSet; |
| import java.util.HashSet; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import com.google.common.base.Splitter; |
| import com.google.common.collect.ImmutableSet; |
| |
| import org.apache.jackrabbit.oak.api.PropertyState; |
| import org.apache.jackrabbit.oak.api.PropertyValue; |
| import org.apache.jackrabbit.oak.api.Tree; |
| import org.apache.jackrabbit.oak.api.Type; |
| import org.apache.jackrabbit.oak.commons.PathUtils; |
| import org.apache.jackrabbit.oak.query.ast.AndImpl; |
| import org.apache.jackrabbit.oak.query.ast.ConstraintImpl; |
| import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl; |
| import org.apache.jackrabbit.oak.query.ast.LiteralImpl; |
| import org.apache.jackrabbit.oak.query.ast.OrImpl; |
| import org.apache.jackrabbit.oak.plugins.memory.PropertyValues; |
| |
| import static java.lang.Character.isLetterOrDigit; |
| import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters; |
| |
| /** |
| * This class can extract excerpts from node. |
| */ |
| class SimpleExcerptProvider { |
| |
| static final String REP_EXCERPT_FN = "rep:excerpt(.)"; |
| static final String EXCERPT_END = "</span></div>"; |
| static final String EXCERPT_BEGIN = "<div><span>"; |
| |
| private static final boolean CASE_SENSITIVE_HIGHLIGHT = Boolean.getBoolean("oak.query.caseSensitiveHighlight"); |
| |
| private static int maxFragmentSize = 150; |
| |
| private SimpleExcerptProvider() { |
| } |
| |
| static String getExcerpt(String path, String columnName, |
| Query query, boolean highlight) { |
| if (path == null) { |
| return null; |
| } |
| Tree t = query.getTree(path); |
| if (t == null || !t.exists()) { |
| return null; |
| } |
| columnName = extractExcerptProperty(columnName); |
| if (columnName != null && columnName.contains("/")) { |
| for (String p : PathUtils.elements(PathUtils |
| .getParentPath(columnName))) { |
| if (t.hasChild(p)) { |
| t = t.getChild(p); |
| } else { |
| return null; |
| } |
| } |
| columnName = PathUtils.getName(columnName); |
| } |
| |
| StringBuilder text = new StringBuilder(); |
| String separator = ""; |
| for (PropertyState p : t.getProperties()) { |
| if (p.getType().tag() == Type.STRING.tag() |
| && (columnName == null || columnName.equalsIgnoreCase(p |
| .getName()))) { |
| text.append(separator); |
| separator = " "; |
| for (String v : p.getValue(Type.STRINGS)) { |
| text.append(v); |
| } |
| } |
| } |
| Set<String> searchToken = extractFulltext(query); |
| if (highlight && searchToken != null) { |
| return highlight(text, searchToken); |
| } |
| return noHighlight(text); |
| } |
| |
| private static String extractExcerptProperty(String column) { |
| // most frequent case first |
| if (REP_EXCERPT_FN.equalsIgnoreCase(column)) { |
| return null; |
| } |
| return column.substring(column.indexOf("(") + 1, column.indexOf(")")); |
| } |
| |
| private static Set<String> extractFulltext(Query q) { |
| // TODO instanceof should not be used |
| if (q instanceof QueryImpl) { |
| return extractFulltext(((QueryImpl) q).getConstraint()); |
| } |
| return ImmutableSet.of(); |
| } |
| |
| private static Set<String> extractFulltext(ConstraintImpl c) { |
| Set<String> tokens = new HashSet<String>(); |
| // TODO instanceof should not be used, |
| // as it will break without us noticing if we extend the AST |
| if (c instanceof FullTextSearchImpl) { |
| FullTextSearchImpl f = (FullTextSearchImpl) c; |
| if (f.getFullTextSearchExpression() instanceof LiteralImpl) { |
| LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression(); |
| tokens.add(l.getLiteralValue().getValue(Type.STRING)); |
| } |
| } |
| if (c instanceof AndImpl) { |
| for (ConstraintImpl constraint : ((AndImpl) c).getConstraints()) { |
| tokens.addAll(extractFulltext(constraint)); |
| } |
| } |
| if (c instanceof OrImpl) { |
| for (ConstraintImpl constraint : ((OrImpl) c).getConstraints()) { |
| tokens.addAll(extractFulltext(constraint)); |
| } |
| } |
| return tokens; |
| } |
| |
| private static Set<String> tokenize(Set<String> in) { |
| Set<String> tokens = new HashSet<String>(); |
| for (String s : in) { |
| tokens.addAll(tokenize(s)); |
| } |
| return tokens; |
| } |
| |
| private static Set<String> tokenize(String in) { |
| Set<String> out = new HashSet<String>(); |
| StringBuilder token = new StringBuilder(); |
| boolean quote = false; |
| for (int i = 0; i < in.length(); ) { |
| final int c = in.codePointAt(i); |
| int length = Character.charCount(c); |
| switch (c) { |
| case ' ': |
| if (quote) { |
| token.append(' '); |
| } else if (token.length() > 0) { |
| out.add(token.toString()); |
| token = new StringBuilder(); |
| } |
| break; |
| case '"': |
| case '\'': |
| if (quote) { |
| quote = false; |
| if (token.length() > 0) { |
| out.add(token.toString()); |
| token = new StringBuilder(); |
| } |
| } else { |
| quote = true; |
| } |
| break; |
| default: |
| token.append(new String(Character.toChars(c))); |
| } |
| i += length; |
| } |
| if (token.length() > 0) { |
| out.add(token.toString()); |
| } |
| return out; |
| } |
| |
| private static String noHighlight(StringBuilder text) { |
| if (text.length() > maxFragmentSize) { |
| int lastSpace = text.lastIndexOf(" ", maxFragmentSize); |
| if (lastSpace != -1) { |
| text.setLength(lastSpace); |
| } else { |
| text.setLength(maxFragmentSize); |
| } |
| text.append(" ..."); |
| } |
| StringBuilder excerpt = new StringBuilder("<div><span>"); |
| excerpt.append(encodeIllegalXMLCharacters(text.toString())); |
| excerpt.append("</span></div>"); |
| return excerpt.toString(); |
| } |
| |
| static String highlight(StringBuilder text, Set<String> searchToken) { |
| Set<String> tokens = tokenize(searchToken); |
| String escaped = encodeIllegalXMLCharacters(text.toString()); |
| BitSet highlight = new BitSet(); |
| for (String token : tokens) { |
| highlight(escaped, highlight, token); |
| } |
| StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); |
| boolean strong = false; |
| for (int i = 0; i < escaped.length(); i++) { |
| if (highlight.get(i) && !strong) { |
| strong = true; |
| excerpt.append("<strong>"); |
| } else if (!highlight.get(i) && strong) { |
| strong = false; |
| excerpt.append("</strong>"); |
| } |
| excerpt.append(escaped.charAt(i)); |
| } |
| if (strong) { |
| excerpt.append("</strong>"); |
| } |
| excerpt.append(EXCERPT_END); |
| return excerpt.toString(); |
| } |
| |
| private static void highlight(String text, BitSet highlightBits, String token) { |
| boolean isLike = false; |
| if (token.endsWith("*")) { |
| if (token.length() == 1) { |
| // don't highlight the '*' character itself |
| return; |
| } |
| token = token.substring(0, token.length() - 1); |
| isLike = true; |
| } |
| int index = 0; |
| while (index < text.length()) { |
| index = indexOfSearchText(text, token, index); |
| if (index < 0) { |
| break; |
| } |
| int endIndex = index + token.length(); |
| if (isLike) { |
| int nextSpace = endIndex; |
| |
| while (nextSpace < text.length() && !isDelimeter(text.codePointAt(nextSpace))) { |
| nextSpace++; |
| } |
| |
| if (nextSpace != text.length()) { |
| endIndex = nextSpace; |
| } else { |
| endIndex = text.length(); |
| } |
| } |
| |
| boolean isStartOk = (index == 0) || //allow for highlighting for token at the beginning |
| isDelimeter(text.codePointAt(index - 1)); //else token must follow a delimeter |
| boolean isEndOk = (endIndex == text.length()) || //token is at the end of string |
| isDelimeter(text.codePointAt(endIndex)); //else token must precede a delimeter |
| |
| if (isStartOk && isEndOk) { |
| while (index < endIndex) { |
| highlightBits.set(index++); |
| } |
| } else { |
| index = endIndex; |
| } |
| } |
| } |
| |
| private static int indexOfSearchText(String text, String searchStr, int fromIndex) { |
| if (CASE_SENSITIVE_HIGHLIGHT) { |
| return text.indexOf(searchStr, fromIndex); |
| } |
| return indexOfIgnoreCase(text, searchStr, fromIndex); |
| } |
| |
| public static int indexOfIgnoreCase(String str, String searchStr, int startPos) { |
| // This is not very efficient, specially as we create the pattern each time. |
| // An alternative is to use apache commons lang StringUtils.indexOfIgnoreCase, |
| // but that would require a new dependency |
| String quotedSearchStr = Pattern.quote(searchStr); |
| Pattern pattern = Pattern.compile(quotedSearchStr, Pattern.CASE_INSENSITIVE); |
| Matcher matcher = pattern.matcher(str); |
| if(matcher.find(startPos)) { |
| return matcher.start(); |
| } |
| return -1; |
| } |
| |
| static boolean isDelimeter(int codePoint) { |
| return !isLetterOrDigit(codePoint); |
| } |
| |
| static PropertyValue getExcerpt(PropertyValue value) { |
| Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings(); |
| StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); |
| for (String v : listSplitter.splitToList(value.toString())) { |
| excerpt.append(v); |
| } |
| excerpt.append(EXCERPT_END); |
| return PropertyValues.newString(excerpt.toString()); |
| } |
| } |