blob: 2bf1e0e79207f0a74503898c84fd831cad3818dc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.accumulo.mr.merge.util;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import org.apache.rya.accumulo.mr.merge.util.QueryRuleset.QueryRulesetException;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.query.QueryEvaluationException;
import org.eclipse.rdf4j.query.algebra.AbstractQueryModelNode;
import org.eclipse.rdf4j.query.algebra.And;
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
import org.eclipse.rdf4j.query.algebra.QueryModelVisitor;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.ValueConstant;
import org.eclipse.rdf4j.query.algebra.ValueExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy;
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
/**
* A rule that defines a subset of statements to copy at the RDF level. Consists of a
* statement pattern and an optional filter expression.
*/
public class CopyRule extends AbstractQueryModelNode {
private static final ValueConstant TRUE = new ValueConstant(SimpleValueFactory.getInstance().createLiteral(true));
private static final String SUFFIX = UUID.randomUUID().toString();
private static final Var SUBJ_VAR = new Var("subject_" + SUFFIX);
private static final Var PRED_VAR = new Var("predicate_" + SUFFIX);
private static final Var OBJ_VAR = new Var("object_" + SUFFIX);
private static final Var CON_VAR = new Var("context_" + SUFFIX);
private static final Var UNDEFINED_VAR = new Var("undefined_" + SUFFIX);
/**
* Return whether a concrete statement satisfies a condition.
* @param stmt A statement from the input
* @param condition The condition to test. Expects variables expressed using the standard variable
* names used by this class, so it must be a rule's condition or derived from rules' conditions.
* @param strategy The evaluation strategy to apply
* @return True if the statement matches the condition
* @throws ValueExprEvaluationException
* @throws QueryEvaluationException
*/
public static boolean accept(final Statement stmt, final ValueExpr condition, final EvaluationStrategy strategy)
throws ValueExprEvaluationException, QueryEvaluationException {
final QueryBindingSet bindings = new QueryBindingSet();
bindings.addBinding(SUBJ_VAR.getName(), stmt.getSubject());
bindings.addBinding(PRED_VAR.getName(), stmt.getPredicate());
bindings.addBinding(OBJ_VAR.getName(), stmt.getObject());
if (stmt.getContext() != null) {
bindings.addBinding(CON_VAR.getName(), stmt.getContext());
}
return strategy.isTrue(condition, bindings);
}
/**
* Detect that a condition is trivial and can be replaced. This may mean it is always
* true, or that it contains undefined variables and therefore we must assume it is
* true or risk missing relevant statements.
* @param expr Condition to be evaluated
* @return true if the condition is trivial
*/
private static boolean trivialCondition(final ValueExpr expr) {
// If the expression is null or the constant "true":
if (expr == null || expr.equals(TRUE)) {
return true;
}
// If the expression contains undefined variables:
final VarSearchVisitor visitor = new VarSearchVisitor(UNDEFINED_VAR.getName());
expr.visit(visitor);
if (visitor.found) {
return true;
}
// Otherwise, the condition is non-trivial.
return false;
}
/**
* Visitor that checks a tree for the existence of a given variable name.
*/
private static class VarSearchVisitor extends AbstractQueryModelVisitor<RuntimeException> {
boolean found = false;
private final String queryVar;
public VarSearchVisitor(final String queryVar) {
this.queryVar = queryVar;
}
@Override
public void meet(final Var var) {
if (queryVar.equals(var.getName())) {
found = true;
}
}
@Override
public void meetNode(final QueryModelNode node) {
if (!found) {
node.visitChildren(this);
}
}
}
/**
* Visitor that standardizes variables to canonical names and restructures
* operators to preserve meaningful expressions while eliminating undefined
* conditions.
*/
private static class RuleVisitor extends AbstractQueryModelVisitor<RuntimeException> {
private final CopyRule rule;
RuleVisitor(final CopyRule rule) {
this.rule = rule;
}
@Override
public void meet(final Var node) {
final String oldName = node.getName();
if (rule.varMap.containsKey(oldName)) {
node.setName(rule.varMap.get(oldName).getName());
}
else {
if (node.hasValue() || node.equals(SUBJ_VAR) || node.equals(PRED_VAR) || node.equals(OBJ_VAR) || node.equals(CON_VAR)) {
return;
}
node.setName(UNDEFINED_VAR.getName());
}
}
/**
* If we can't evaluate one half of an AND by looking at this statement alone, it might
* turn out to be true in the full graph, so the statement is relevant if the half we
* can evaluate is true. If we can't evaluate either half, then the AND is useless and
* we must assume the statement is relevant. Otherwise, keep both sides.
*/
@Override
public void meet(final And expr) {
final ValueExpr left = expr.getLeftArg();
final ValueExpr right = expr.getRightArg();
left.visit(this);
right.visit(this);
final QueryModelNode parent = expr.getParentNode();
if (trivialCondition(left)) {
if (trivialCondition(right)) {
// Both sides are trivial; replace whole node
parent.replaceChildNode(expr, null);
}
else {
// Left side trivial, right side good; replace node with right arg
parent.replaceChildNode(expr, right);
}
}
else if (trivialCondition(right)) {
// Left side good, right side trivial; replace node with left arg
parent.replaceChildNode(expr, left);
}
// Otherwise, both sides are useful
}
}
private final StatementPattern statement;
private ValueExpr condition;
private final Map<String, Var> varMap = new HashMap<>();
private final RuleVisitor visitor = new RuleVisitor(this);
/**
* Instantiate a rule containing a StatementPattern, renaming any variables to canonical
* subject/predicate/object forms and saving the mappings from the original variable names.
* @param sp StatementPattern defining a set of triples to match
*/
public CopyRule(final StatementPattern sp) throws QueryRulesetException {
statement = sp;
final Var subjVar = statement.getSubjectVar();
final Var predVar = statement.getPredicateVar();
final Var objVar = statement.getObjectVar();
final Var conVar = statement.getContextVar();
int variables = 0;
if (subjVar == null || !subjVar.hasValue()) {
sp.setSubjectVar(SUBJ_VAR);
if (subjVar != null) {
varMap.put(subjVar.getName(), SUBJ_VAR);
}
variables++;
}
if (predVar == null || !predVar.hasValue()) {
sp.setPredicateVar(PRED_VAR);
if (predVar != null) {
varMap.put(predVar.getName(), PRED_VAR);
}
variables++;
}
if (objVar == null || !objVar.hasValue()) {
sp.setObjectVar(OBJ_VAR);
if (objVar != null) {
varMap.put(objVar.getName(), OBJ_VAR);
}
variables++;
}
if (variables == 3) {
throw new QueryRulesetException("Statement pattern with no constants would match every statement:\n" + sp);
}
if (conVar != null && !conVar.hasValue()) {
sp.setContextVar(CON_VAR);
varMap.put(conVar.getName(), CON_VAR);
}
}
/**
* Set the complete condition.
*/
private void setCondition(final ValueExpr newCondition) {
condition = newCondition;
condition.setParentNode(this);
}
/**
* Constrain a rule with a filter condition. If there are already conditions on the rule, they
* will be ANDed together. If this rule doesn't define all the variables used in the condition
* (because the rule only matches one statement pattern), it will assume those portions match,
* so that we are guaranteed to include all relevant statements.
* @param condition A boolean filter expression
*/
public void addCondition(final ValueExpr condition) {
final ValueExpr newCondition = condition.clone();
if (this.condition == null) {
setCondition(newCondition);
}
else {
setCondition(new And(this.condition, newCondition));
}
this.condition.visit(visitor);
// If, after rewriting, the condition still contains undefined variables, we can't
// meaningfully apply it to reject statements.
if (trivialCondition(this.condition)) {
this.condition = null;
}
}
/**
* Get the rule's boolean filter condition, if any.
* @return a ValueExpr that can be applied to any matching statements, or null.
*/
public ValueExpr getCondition() {
return condition;
}
/**
* Get the rule's statement pattern.
* @return A StatementPattern that defines which statements match the rule.
*/
public StatementPattern getStatement() {
return statement;
}
/**
* Validate that this is a non-trivial rule. A trivial rule consists of
* a statement pattern whose subject, predicate, and object are all
* variables (therefore it would match every statement).
* @return true if the rule is valid (non-trivial)
*/
void validate() throws QueryRulesetException {
if (!(statement.getSubjectVar().isConstant()
|| statement.getPredicateVar().isConstant()
|| statement.getObjectVar().isConstant())) {
throw new QueryRulesetException("Statement pattern with no constants would match every statement:\n" + statement.toString());
}
}
/**
* Replace a node in the rule's condition with some replacement. If
* @param current If this is found in the condition, apply the replacement
* @param replacement Replace with this. If replacing the top-level node, the replacement
* must be a ValueExpr or null.
*/
@Override
public void replaceChildNode(final QueryModelNode current, final QueryModelNode replacement) {
if (current.equals(condition) && replacement instanceof ValueExpr) {
setCondition(((ValueExpr) replacement).clone());
}
else if (current.equals(condition) && replacement == null) {
condition = null;
}
else if (condition != null) {
condition.replaceChildNode(current, replacement.clone());
}
}
/**
* Apply a visitor to both the statement and any conditions.
*/
@Override
public <X extends Exception> void visit(final QueryModelVisitor<X> visitor) throws X {
if (statement != null) {
statement.visit(visitor);
}
if (condition != null) {
condition.visit(visitor);
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder(statement.toString().trim());
if (condition != null) {
sb.append("\n Condition:\n \t");
sb.append(condition.toString().trim().replace("\n", "\n \t"));
}
return sb.toString();
}
@Override
public boolean equals(final Object obj) {
if (!obj.getClass().equals(CopyRule.class)) {
return false;
}
final CopyRule other = (CopyRule) obj;
if ((statement != null && !statement.equals(other.statement))
|| (statement == null && other.statement != null)) {
return false;
}
return (condition == null || condition.equals(other.condition))
&& (condition != null || other.condition == null);
}
@Override
public int hashCode() {
int result = statement == null ? 0 : statement.hashCode();
result += result * 41 + (condition == null ? 0 : condition.hashCode());
return result;
}
/**
* Detect whether this rule is at least as general as another.
* @param other Rule to compare against
* @return true if this rule will necessarily match everything the other rule would.
*/
public boolean isGeneralizationOf(final CopyRule other) {
if (statement == null || other.statement == null) {
return false;
}
// If each component of the statement and the condition are at least as general
// as the other rule's, then this rule is at least as general.
return varIsGeneralization(statement.getSubjectVar(), other.statement.getSubjectVar())
&& varIsGeneralization(statement.getPredicateVar(), other.statement.getPredicateVar())
&& varIsGeneralization(statement.getObjectVar(), other.statement.getObjectVar())
&& varIsGeneralization(statement.getContextVar(), other.statement.getContextVar())
&& (condition == null || condition.equals(other.condition));
}
/**
* Determine whether the first variable is at least as general as the second.
*/
private static boolean varIsGeneralization(final Var first, final Var second) {
if (first == null || !first.hasValue()) {
// if first is a variable, it is at least as general
return true;
}
// Otherwise, it is only at least as general if they are the same value
return second != null && first.getValue().equals(second.getValue());
}
}