blob: bfa0f68313083246045fd97ab31a3ea18ac91a3c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.observation.filter;
import static java.util.Collections.disjoint;
import static org.apache.jackrabbit.oak.commons.PathUtils.concat;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.spi.observation.ChangeSet;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ChangeSetFilterImpl implements ChangeSetFilter {
private static final Logger LOG = LoggerFactory.getLogger(ChangeSetFilterImpl.class);
private static final int MAX_EXCLUDED_PATHS = 11;
private static final int MAX_EXCLUDE_PATH_CUTOFF_LEVEL = 6;
private final Set<String> rootIncludePaths;
private final Set<String> firstLevelIncludeNames;
private final Set<Pattern> includePathPatterns;
private final Set<Pattern> excludePathPatterns;
private final Set<Pattern> unpreciseExcludePathPatterns;
private final Set<String> parentNodeNames;
private final Set<String> parentNodeTypes;
private final Set<String> propertyNames;
@Override
public String toString() {
return "ChangeSetFilterImpl[rootIncludePaths="+rootIncludePaths+", includePathPatterns="+includePathPatterns+
", excludePathPatterns="+excludePathPatterns+", parentNodeNames="+parentNodeNames+", parentNodeTypes="+
parentNodeTypes+", propertyNames="+propertyNames+"]";
}
public ChangeSetFilterImpl(@NotNull Set<String> includedParentPaths, boolean isDeep,
@Nullable Set<String> additionalIncludedParentPaths, Set<String> excludedParentPaths,
Set<String> parentNodeNames, Set<String> parentNodeTypes, Set<String> propertyNames) {
this(includedParentPaths, isDeep, additionalIncludedParentPaths, excludedParentPaths, parentNodeNames, parentNodeTypes, propertyNames,
MAX_EXCLUDED_PATHS);
}
public ChangeSetFilterImpl(@NotNull Set<String> includedParentPaths, boolean isDeep,
@Nullable Set<String> additionalIncludedParentPaths, Set<String> excludedParentPaths,
Set<String> parentNodeNames, Set<String> parentNodeTypes, Set<String> propertyNames,
int maxExcludedPaths) {
this.rootIncludePaths = new HashSet<String>();
this.includePathPatterns = new HashSet<Pattern>();
Set<String> firstLevelIncludePaths = new HashSet<String>();
for (String aRawIncludePath : includedParentPaths) {
final String aGlobbingIncludePath;
if (aRawIncludePath.contains("*")) {
// then isDeep is not applicable, it is already a glob path
aGlobbingIncludePath = aRawIncludePath;
} else {
aGlobbingIncludePath = !isDeep ? aRawIncludePath : concat(aRawIncludePath, "**");
}
this.rootIncludePaths.add(aRawIncludePath);
this.includePathPatterns.add(asPattern(aGlobbingIncludePath));
if (firstLevelIncludePaths != null) {
final String firstLevelName = firstLevelName(aRawIncludePath);
if (firstLevelName != null && !firstLevelName.contains("*")) {
firstLevelIncludePaths.add(firstLevelName);
} else {
firstLevelIncludePaths = null;
}
}
}
if (additionalIncludedParentPaths != null) {
for (String path : additionalIncludedParentPaths) {
this.rootIncludePaths.add(path);
this.includePathPatterns.add(asPattern(path));
if (firstLevelIncludePaths != null) {
final String firstLevelName = firstLevelName(path);
if (firstLevelName != null && !firstLevelName.contains("*")) {
firstLevelIncludePaths.add(firstLevelName);
} else {
firstLevelIncludePaths = null;
}
}
}
}
this.firstLevelIncludeNames = firstLevelIncludePaths;
// OAK-5169:
// excludedParentPaths could in theory be a large list, in which case
// the excludes() algorithm becomes non-performing. Reason is, that it
// iterates through the changeSet and then through the excludePaths.
// which means it becomes an O(n*m) operation.
// This should be avoided and one way to avoid this is to make an
// unprecise exclude filtering, where a smaller number of parent
// exclude paths is determined - and if the change is within this
// unprecise set, then we have to include it (with the risk of
// false negative) - but if the change is outside of this unprecise
// set, then we are certain that we are not excluding it.
// one way this unprecise filter can be implemented is by
// starting off with eg 6 levels deep paths, and check if that brings
// down the number far enough (to eg 11), if it's still too high,
// cut off exclude paths at level 5 and repeat until the figure ends
// up under 11.
this.excludePathPatterns = new HashSet<Pattern>();
this.unpreciseExcludePathPatterns = new HashSet<Pattern>();
if (excludedParentPaths.size() < maxExcludedPaths) {
for (String aRawExcludePath : excludedParentPaths) {
this.excludePathPatterns.add(asPattern(concat(aRawExcludePath, "**")));
}
} else {
final Set<String> unprecisePaths = unprecisePaths(excludedParentPaths, maxExcludedPaths, MAX_EXCLUDE_PATH_CUTOFF_LEVEL);
for (String anUnprecisePath : unprecisePaths) {
this.unpreciseExcludePathPatterns.add(asPattern(concat(anUnprecisePath, "**")));
}
}
this.propertyNames = propertyNames == null ? null : new HashSet<String>(propertyNames);
this.parentNodeTypes = parentNodeTypes == null ? null : new HashSet<String>(parentNodeTypes);
this.parentNodeNames = parentNodeNames == null ? null : new HashSet<String>(parentNodeNames);
}
private String firstLevelName(String path) {
if (path.isEmpty() || path.equals("/")) {
return null;
}
int secondSlash = path.indexOf("/", 1);
if (secondSlash != -1) {
return path.substring(1, secondSlash);
} else {
return path.substring(1);
}
}
private Set<String> unprecisePaths(Set<String> paths, int maxExcludedPaths, int maxExcludePathCutOffLevel) {
int level = maxExcludePathCutOffLevel;
while(level > 1) {
Set<String> unprecise = unprecisePaths(paths, level);
if (unprecise.size() < maxExcludedPaths) {
return unprecise;
}
level--;
}
// worst case: we even have too many top-level paths, so
// the only way out here is by returning a set containing only "/"
HashSet<String> result = new HashSet<String>();
result.add("/");
return result;
}
private Set<String> unprecisePaths(Set<String> paths, int level) {
Set<String> result = new HashSet<String>();
for (String path : paths) {
String unprecise = path;
while(PathUtils.getDepth(unprecise) > level) {
unprecise = PathUtils.getParentPath(unprecise);
}
result.add(unprecise);
}
return result;
}
/** for testing only **/
public Set<String> getRootIncludePaths() {
return rootIncludePaths;
}
private Pattern asPattern(String patternWithGlobs) {
return Pattern.compile(GlobbingPathHelper.globPathAsRegex(patternWithGlobs));
}
@Override
public boolean excludes(ChangeSet changeSet) {
try{
return doExcludes(changeSet);
} catch(Exception e) {
LOG.warn("excludes: got an Exception while evaluating excludes: " + e.getMessage() +
", changeSet=" + changeSet, e);
return false; // false is the safer option
}
}
private boolean doExcludes(ChangeSet changeSet) {
if (changeSet.anyOverflow()) {
// in case of an overflow we could
// either try to still determine include/exclude based on non-overflown
// sets - or we can do a fail-stop and determine this as too complex
// to try-to-exclude, and just include
//TODO: optimize this later
return false;
}
if (changeSet.doesHitMaxPathDepth()) {
// then we might or might not include this - but without
// further complicated checks this can't be determined for sure
// so for simplicity reason just check first level include names
// if available
if (firstLevelIncludeNames == null) {
return false;
}
for (String parentPath : changeSet.getParentPaths()) {
String firstLevelName = firstLevelName(parentPath);
if (firstLevelName != null && firstLevelIncludeNames.contains(firstLevelName)) {
return false;
}
}
// none of the first level include names matched any parentPath
// we can safely exclude this change set
return true;
}
final Set<String> parentPaths = new HashSet<String>(changeSet.getParentPaths());
// first go through the unprecise excludes. if that has any hit,
// we have to let it pass as include
boolean unpreciseExclude = false;
if (this.unpreciseExcludePathPatterns.size() != 0) {
final Iterator<String> it = parentPaths.iterator();
while (it.hasNext()) {
final String aParentPath = it.next();
if (patternsMatch(this.unpreciseExcludePathPatterns, aParentPath)) {
// if there is an unprecise match we keep track of that fact
// for later in this method
unpreciseExclude = true;
break;
}
}
}
// first go through excludes to remove those that are explicitly
// excluded
if (this.excludePathPatterns.size() != 0) {
final Iterator<String> it = parentPaths.iterator();
while (it.hasNext()) {
final String aParentPath = it.next();
if (patternsMatch(this.excludePathPatterns, aParentPath)) {
// if an exclude pattern matches, remove the parentPath
it.remove();
}
}
}
// note that cut-off paths are not applied with excludes,
// eg if excludePaths contains /var/foo/bar and path contains /var/foo
// with a maxPathLevel of 2, that might very well mean that
// the actual path would have been /var/foo/bar, but we don't know.
// so we cannot exclude it here and thus have a potential false negative
// (ie we didn't exclude it in the prefilter)
// now remainingPaths contains what is not excluded,
// then check if it is included
boolean included = false;
for (String aPath : parentPaths) {
// direct set contains is fastest, lets try that first
if (this.rootIncludePaths.contains(aPath)) {
included = true;
break;
}
if (firstLevelIncludeNames != null) {
final String firstLevelName = firstLevelName(aPath);
if (firstLevelName != null && !firstLevelIncludeNames.contains(firstLevelName)) {
// then the 'first level name check' concluded that
// it's not in any include path - hence we can skip
// the (more expensive) pattern check
continue;
}
}
if (patternsMatch(this.includePathPatterns, aPath)) {
included = true;
break;
}
}
if (!included) {
// well then we can definitely say that this commit is excluded
return true;
} else if (unpreciseExclude) {
// then it might have been excluded but we are not sure
// in which case we return false (as that's safe always)
return false;
}
if (this.propertyNames != null && this.propertyNames.size() != 0) {
if (disjoint(changeSet.getPropertyNames(), this.propertyNames)) {
// if propertyNames are defined then if we can't find any
// at this stage (if !included) then this equals to filtering out
return true;
}
// otherwise we have found a match, but one of the
// nodeType/nodeNames
// could still filter out, so we have to continue...
}
if (this.parentNodeTypes != null && this.parentNodeTypes.size() != 0) {
if (disjoint(changeSet.getParentNodeTypes(), this.parentNodeTypes)) {
// same story here: if nodeTypes is defined and we can't find any
// match
// then we're done now
return true;
}
// otherwise, again, continue
}
if (this.parentNodeNames != null && this.parentNodeNames.size() != 0) {
// and a 3rd time, if we can't find any nodeName match
// here, then we're filtering out
if (disjoint(changeSet.getParentNodeNames(), this.parentNodeNames)) {
return true;
}
}
// at this stage we haven't found any exclude, so we're likely including
return false;
}
private static boolean patternsMatch(Set<Pattern> pathPatterns, String path) {
if (path == null) {
return false;
}
for (Pattern pathPattern : pathPatterns) {
if (pathPattern.matcher(path).matches()) {
return true;
}
}
return false;
}
}