blob: 3b62abc088209d66f20a13272aec41e0e128e2fa [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.property.strategy;
import static com.google.common.collect.Queues.newArrayDeque;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ENTRY_COUNT_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_CONTENT_NODE_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.KEY_COUNT_PROPERTY_NAME;
import java.util.Deque;
import java.util.Iterator;
import java.util.Set;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.counter.ApproximateCounter;
import org.apache.jackrabbit.oak.plugins.index.counter.NodeCounterEditor;
import org.apache.jackrabbit.oak.plugins.index.counter.jmx.NodeCounter;
import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
import org.apache.jackrabbit.oak.query.FilterIterators;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.query.QueryLimits;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Supplier;
import com.google.common.collect.Iterators;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
/**
* An IndexStoreStrategy implementation that saves the nodes under a hierarchy
* that mirrors the repository tree. <br>
* This should minimize the chance that concurrent updates overlap on the same
* content node.<br>
* <br>
* For example for a node that is under {@code /test/node}, the index
* structure will be {@code /oak:index/index/test/node}:
*
* <pre>
* {@code
* /
* test
* node
* oak:index
* index
* test
* node
* }
* </pre>
*
*/
public class ContentMirrorStoreStrategy implements IndexStoreStrategy {
static final Logger LOG = LoggerFactory.getLogger(ContentMirrorStoreStrategy.class);
/**
* logging a warning every {@code oak.traversing.warn} traversed nodes. Default {@code 10000}
*/
public static final int TRAVERSING_WARN = Integer.getInteger("oak.traversing.warn", 10000);
private final String indexName;
private final String pathPrefix;
private final boolean prependPathPrefix;
public ContentMirrorStoreStrategy() {
this(INDEX_CONTENT_NODE_NAME);
}
public ContentMirrorStoreStrategy(String indexName) {
this(indexName, "", true);
}
/**
* Constructs a ContentMirrorStoreStrategy
*
* @param indexName name of sub node under which paths are stored
* @param pathPrefix path of the index in repository. Defaults to empty for indexes at root nodes i.e.
* those stored directly under '/oak:index'. For non root index its the path excluding
* the '/oak:index' node. For e.g. for index at '/content/oak:index/fooIndex' the
* pathPrefix would be '/content'.
* If this is appened to the paths returned by index then they would become absolute
* path in repository
* @param prependPathPrefix Should the path prefix be added to the query result
*/
public ContentMirrorStoreStrategy(String indexName, String pathPrefix, boolean prependPathPrefix) {
this.indexName = indexName;
this.pathPrefix = pathPrefix;
this.prependPathPrefix = prependPathPrefix;
}
@Override
public void update(
Supplier<NodeBuilder> index, String path,
@Nullable final String indexName,
@Nullable final NodeBuilder indexMeta,
Set<String> beforeKeys, Set<String> afterKeys) {
for (String key : beforeKeys) {
remove(index.get(), key, path);
}
for (String key : afterKeys) {
insert(index.get(), key, path);
}
}
private void remove(NodeBuilder index, String key, String value) {
ApproximateCounter.adjustCountSync(index, -1);
NodeBuilder builder = index.getChildNode(key);
if (builder.exists()) {
ApproximateCounter.adjustCountSync(builder, -1);
// Collect all builders along the given path
Deque<NodeBuilder> builders = newArrayDeque();
builders.addFirst(builder);
// Descend to the correct location in the index tree
for (String name : PathUtils.elements(value)) {
builder = builder.getChildNode(name);
builders.addFirst(builder);
}
// Drop the match value, if present
if (builder.exists()) {
builder.removeProperty("match");
}
// Prune all index nodes that are no longer needed
prune(index, builders, key);
}
}
private void insert(NodeBuilder index, String key, String value) {
ApproximateCounter.adjustCountSync(index, 1);
// NodeBuilder builder = index.child(key);
NodeBuilder builder = fetchKeyNode(index, key);
ApproximateCounter.adjustCountSync(builder, 1);
for (String name : PathUtils.elements(value)) {
builder = builder.child(name);
}
builder.setProperty("match", true);
}
public Iterable<String> query(final Filter filter, final String indexName,
final NodeState indexMeta, final String indexStorageNodeName,
final Iterable<String> values) {
final NodeState index = indexMeta.getChildNode(indexStorageNodeName);
return new Iterable<String>() {
@Override
public Iterator<String> iterator() {
PathIterator it = new PathIterator(filter, indexName, pathPrefix, prependPathPrefix);
if (values == null) {
it.setPathContainsValue(true);
it.enqueue(getChildNodeEntries(index).iterator());
} else {
for (String p : values) {
NodeState property = index.getChildNode(p);
if (property.exists()) {
// we have an entry for this value, so use it
it.enqueue(Iterators.singletonIterator(
new MemoryChildNodeEntry("", property)));
}
}
}
return it;
}
};
}
@NotNull
Iterable<? extends ChildNodeEntry> getChildNodeEntries(@NotNull
final NodeState index) {
return index.getChildNodeEntries();
}
@Override
public Iterable<String> query(final Filter filter, final String name,
final NodeState indexMeta, final Iterable<String> values) {
return query(filter, name, indexMeta, this.indexName, values);
}
@Override
public long count(NodeState root, NodeState indexMeta, Set<String> values, int max) {
return count(null, root, indexMeta, this.indexName, values, max);
}
@Override
public long count(final Filter filter, NodeState root, NodeState indexMeta, Set<String> values, int max) {
return count(filter, root, indexMeta, this.indexName, values, max);
}
long count(Filter filter, NodeState root, NodeState indexMeta, final String indexStorageNodeName,
Set<String> values, int max) {
NodeState index = indexMeta.getChildNode(indexStorageNodeName);
long count = -1;
if (values == null) {
// property is not null
PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
if (ec != null) {
// negative value implies fall-back to counting
count = ec.getValue(Type.LONG);
} else {
// negative value means that approximation isn't available
count = ApproximateCounter.getCountSync(index);
}
if (count < 0) {
CountingNodeVisitor v = new CountingNodeVisitor(max);
v.visit(index);
count = v.getEstimatedCount();
if (count >= max) {
// "is not null" queries typically read more data
count *= 10;
}
}
} else {
// property = x, or property in (x, y, z)
int size = values.size();
if (size == 0) {
return 0;
}
PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
if (ec != null) {
count = ec.getValue(Type.LONG);
if (count >= 0) {
// assume 10*NodeCounterEditor.DEFAULT_RESOLUTION entries per key, so that this index is used
// instead of traversal, but not instead of a regular property index
long keyCount = count / (10 * NodeCounterEditor.DEFAULT_RESOLUTION);
ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
if (ec != null) {
keyCount = ec.getValue(Type.LONG);
}
// cast to double to avoid overflow
// (entryCount could be Long.MAX_VALUE)
// the cost is not multiplied by the size,
// otherwise the traversing index might be used
keyCount = Math.max(1, keyCount);
count = (long) ((double) count / keyCount) + size;
}
} else {
// for this index, property "entryCount" is not set
long approxMax = 0;
long approxCount = ApproximateCounter.getCountSync(index);
if (approxCount != -1) {
// approximate count is available for the index:
// check approximate counts for each value
for (String p : values) {
NodeState s = index.getChildNode(p);
if (s.exists()) {
long a = ApproximateCounter.getCountSync(s);
if (a != -1) {
approxMax += a;
} else if (approxMax > 0) {
// in absence of approx count for a key we should be conservative
approxMax += 10 * NodeCounterEditor.DEFAULT_RESOLUTION;
}
}
}
if (approxMax > 0) {
count = approxMax;
}
}
}
// still, property = x, or property in (x, y, z),
// and we don't know the count ("entryCount" = -1)
if (count < 0) {
count = 0;
max = Math.max(10, max / size);
int i = 0;
for (String p : values) {
if (count > max && i > 3) {
// the total count is extrapolated from the the number
// of values counted so far to the total number of values
count = count * size / i;
break;
}
NodeState s = index.getChildNode(p);
if (s.exists()) {
CountingNodeVisitor v = new CountingNodeVisitor(max);
v.visit(s);
count += v.getEstimatedCount();
}
i++;
}
}
}
String filterRootPath = null;
if (filter != null &&
filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN)) {
filterRootPath = filter.getPath();
}
if (filterRootPath != null) {
// scale cost according to path restriction
long totalNodesCount = NodeCounter.getEstimatedNodeCount(root, "/", true);
if (totalNodesCount != -1) {
long filterPathCount = NodeCounter.getEstimatedNodeCount(root, filterRootPath, true);
if (filterPathCount != -1) {
// assume nodes in the index are evenly distributed in the repository (old idea)
long countScaledDown = (long) ((double) count / totalNodesCount * filterPathCount);
// assume 80% of the indexed nodes are in this subtree
long mostNodesFromThisSubtree = (long) (filterPathCount * 0.8);
// count can at most be the assumed subtree size
count = Math.min(count, mostNodesFromThisSubtree);
// this in theory should not have any effect,
// except if the above estimates are incorrect,
// so this is just for safety feature
count = Math.max(count, countScaledDown);
}
}
}
return count;
}
/**
* An iterator over paths within an index node.
*/
static class PathIterator implements Iterator<String> {
private final Filter filter;
private final String indexName;
private final Deque<Iterator<? extends ChildNodeEntry>> nodeIterators =
Queues.newArrayDeque();
private int readCount;
private int intermediateNodeReadCount;
private boolean init;
private boolean closed;
private String filterPath;
private String pathPrefix;
private String parentPath;
private String currentPath;
private boolean pathContainsValue;
private final boolean prependPathPrefix;
/**
* Keep the returned path, to avoid returning duplicate entries.
*/
private final Set<String> knownPaths = Sets.newHashSet();
private final QueryLimits settings;
PathIterator(Filter filter, String indexName, String pathPrefix, boolean prependPathPrefix) {
this.filter = filter;
this.pathPrefix = pathPrefix;
this.indexName = indexName;
boolean shouldDescendDirectly = filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN);
if (shouldDescendDirectly) {
filterPath = filter.getPath();
if (PathUtils.denotesRoot(filterPath)) {
filterPath = "";
}
} else {
filterPath = "";
}
parentPath = "";
currentPath = "/";
this.settings = filter.getQueryLimits();
this.prependPathPrefix = prependPathPrefix;
}
void enqueue(Iterator<? extends ChildNodeEntry> it) {
nodeIterators.addLast(it);
}
void setPathContainsValue(boolean pathContainsValue) {
if (init) {
throw new IllegalStateException("This iterator is already initialized");
}
this.pathContainsValue = pathContainsValue;
}
@Override
public boolean hasNext() {
if (!closed && !init) {
fetchNext();
init = true;
}
return !closed;
}
private void fetchNext() {
while (true) {
fetchNextPossiblyDuplicate();
if (closed) {
return;
}
if (pathContainsValue) {
String value = PathUtils.elements(currentPath).iterator().next();
currentPath = PathUtils.relativize(value, currentPath);
// don't return duplicate paths:
// Set.add returns true if the entry was new,
// so if it returns false, it was already known
if (!knownPaths.add(currentPath)) {
continue;
}
}
break;
}
}
private void fetchNextPossiblyDuplicate() {
while (!nodeIterators.isEmpty()) {
Iterator<? extends ChildNodeEntry> iterator = nodeIterators.getLast();
if (iterator.hasNext()) {
ChildNodeEntry entry = iterator.next();
NodeState node = entry.getNodeState();
String name = entry.getName();
if (NodeStateUtils.isHidden(name)) {
continue;
}
currentPath = PathUtils.concat(parentPath, name);
if (!"".equals(filterPath)) {
String p = currentPath;
if (pathContainsValue) {
String value = PathUtils.elements(p).iterator().next();
p = PathUtils.relativize(value, p);
}
if ("".equals(pathPrefix)) {
p = PathUtils.concat("/", p);
} else {
p = PathUtils.concat(pathPrefix, p);
}
if (!"".equals(p) &&
!p.equals(filterPath) &&
!PathUtils.isAncestor(p, filterPath) &&
!PathUtils.isAncestor(filterPath, p)) {
continue;
}
}
nodeIterators.addLast(node.getChildNodeEntries().iterator());
parentPath = currentPath;
if (node.getBoolean("match")) {
readCount++;
if (readCount % TRAVERSING_WARN == 0) {
FilterIterators.checkReadLimit(readCount, settings);
LOG.warn("Index-Traversed {} nodes ({} index entries) using index {} with filter {}", readCount, intermediateNodeReadCount, indexName, filter);
}
return;
} else {
intermediateNodeReadCount++;
}
} else {
nodeIterators.removeLast();
parentPath = PathUtils.getParentPath(parentPath);
}
}
currentPath = null;
closed = true;
}
@Override
public String next() {
if (closed) {
throw new IllegalStateException("This iterator is closed");
}
if (!init) {
fetchNext();
init = true;
}
String result = prependPathPrefix ? PathUtils.concat(pathPrefix, currentPath) : currentPath;
fetchNext();
return result;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
/**
* A node visitor to recursively traverse a number of nodes.
*/
interface NodeVisitor {
void visit(NodeState state);
}
/**
* A node visitor that counts the number of matching nodes up to a given
* maximum, in order to estimate the number of matches.
*/
static class CountingNodeVisitor implements NodeVisitor {
/**
* The maximum number of matching nodes to count.
*/
final int maxCount;
/**
* The current count of matching nodes.
*/
int count;
/**
* The current depth (number of parent nodes).
*/
int depth;
/**
* The sum of the depth of all matching nodes. This value is used to
* calculate the average depth.
*/
long depthTotal;
CountingNodeVisitor(int maxCount) {
this.maxCount = maxCount;
}
@Override
public void visit(NodeState state) {
if (state.hasProperty("match")) {
count++;
depthTotal += depth;
}
if (count < maxCount) {
depth++;
for (ChildNodeEntry entry : state.getChildNodeEntries()) {
if (count >= maxCount) {
break;
}
visit(entry.getNodeState());
}
depth--;
}
}
/**
* The number of matches (at most the maximum count).
*
* @return the match count
*/
int getCount() {
return count;
}
/**
* The number of estimated matches. This value might be higher than the
* number of counted matches, if the maximum number of matches has been
* reached. It is based on the average depth of matches, and the average
* number of child nodes.
*
* @return the estimated matches
*/
int getEstimatedCount() {
if (count < maxCount) {
return count;
}
double averageDepth = (int) (depthTotal / count);
// the number of estimated matches is higher
// the higher the average depth of the first hits
long estimatedNodes = (long) (count * Math.pow(1.1, averageDepth));
estimatedNodes = Math.min(estimatedNodes, Integer.MAX_VALUE);
return Math.max(count, (int) estimatedNodes);
}
}
/**
* fetch from the index the <i>key</i> node
*
* @param index
* the current index root
* @param key
* the 'key' to fetch from the repo
* @return the node representing the key
*/
NodeBuilder fetchKeyNode(@NotNull NodeBuilder index,
@NotNull String key) {
return index.child(key);
}
/**
* Physically prune a list of nodes from the index
*
* @param index
* the current index
* @param builders
* list of nodes to prune
* @param key the key of the index we're processing
*/
void prune(final NodeBuilder index, final Deque<NodeBuilder> builders, final String key) {
for (NodeBuilder node : builders) {
if (node.getBoolean("match") || node.getChildNodeCount(1) > 0) {
return;
} else if (node.exists()) {
node.remove();
}
}
}
@Override
public boolean exists(Supplier<NodeBuilder> index, String key) {
// This is currently not implemented, because there is no test case for it,
// and because there is currently no need for this method with this class.
// We would need to traverse the tree and search for an entry "match".
// See also OAK-2663 for a potential (but untested) implementation.
throw new UnsupportedOperationException();
}
@Override
public String getIndexNodeName() {
return indexName;
}
}