blob: f20d16cbd0b2ac697eda68dd2e1d37a307d61a45 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.accumulo.mr.merge.util;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.apache.rya.accumulo.mr.merge.CopyTool;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
import org.apache.rya.rdftriplestore.RdfCloudTripleStore;
import org.apache.rya.rdftriplestore.inference.InferJoin;
import org.apache.rya.rdftriplestore.inference.InferUnion;
import org.apache.rya.rdftriplestore.inference.InferenceEngine;
import org.apache.rya.rdftriplestore.inference.InverseOfVisitor;
import org.apache.rya.rdftriplestore.inference.SameAsVisitor;
import org.apache.rya.rdftriplestore.inference.SubClassOfVisitor;
import org.apache.rya.rdftriplestore.inference.SubPropertyOfVisitor;
import org.apache.rya.rdftriplestore.inference.SymmetricPropertyVisitor;
import org.apache.rya.rdftriplestore.inference.TransitivePropertyVisitor;
import org.apache.rya.rdftriplestore.utils.FixedStatementPattern;
import org.apache.rya.rdftriplestore.utils.TransitivePropertySP;
import org.apache.rya.sail.config.RyaSailFactory;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.OWL;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.query.MalformedQueryException;
import org.eclipse.rdf4j.query.QueryLanguage;
import org.eclipse.rdf4j.query.UnsupportedQueryLanguageException;
import org.eclipse.rdf4j.query.algebra.Filter;
import org.eclipse.rdf4j.query.algebra.FunctionCall;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.ListMemberOperator;
import org.eclipse.rdf4j.query.algebra.Or;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.Union;
import org.eclipse.rdf4j.query.algebra.ValueExpr;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.function.FunctionRegistry;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.query.parser.ParsedTupleQuery;
import org.eclipse.rdf4j.query.parser.QueryParserUtil;
import org.eclipse.rdf4j.sail.SailException;
/**
* Represents a set of {@link CopyRule} instances derived from a query. The ruleset determines a logical
* subset of statements in Rya, such that statements selected by the ruleset are at least enough to answer
* the query.
*/
public class QueryRuleset {
private static final Logger log = Logger.getLogger(QueryRuleset.class);
/**
* Represents an error attempting to convert a query to a set of rules.
*/
public static class QueryRulesetException extends Exception {
private static final long serialVersionUID = 1L;
public QueryRulesetException(final String s) {
super(s);
}
public QueryRulesetException(final String s, final Throwable throwable) {
super(s, throwable);
}
}
/**
* Takes in a parsed query tree and extracts the rules defining relevant statements.
*/
private static class RulesetVisitor extends AbstractQueryModelVisitor<QueryRulesetException> {
List<CopyRule> rules = new LinkedList<>();
private final Set<Value> superclasses = new HashSet<>();
private final Set<Value> superproperties = new HashSet<>();
private final Set<Value> sameAs = new HashSet<>();
private final Set<Value> transitive = new HashSet<>();
private final Set<Value> schemaProperties = new HashSet<>();
@Override
public void meet(StatementPattern node) throws QueryRulesetException {
final Var predVar = node.getPredicateVar();
// If this is a transitive property node, just match all statements with that property
if (node instanceof TransitivePropertySP && predVar.hasValue()) {
node = new StatementPattern(new Var("transitiveSubject"), predVar,
new Var("transitiveObject"), node.getContextVar());
// And make sure to grab the transitivity statement itself
transitive.add(predVar.getValue());
}
rules.add(new CopyRule(node));
}
@Override
public void meet(final Filter node) throws QueryRulesetException {
final ValueExpr condition = node.getCondition();
// If the condition is a function call, and we don't know about the function, don't try to test for it.
if (condition instanceof FunctionCall) {
final String uri = ((FunctionCall) condition).getURI();
if (FunctionRegistry.getInstance().get(uri) == null) {
// Just extract statement patterns from the child as if there were no filter.
node.getArg().visit(this);
}
}
// Otherwise, assume we can test for it: extract rules from below this node, and add the condition to each one.
else {
final RulesetVisitor childVisitor = new RulesetVisitor();
node.getArg().visit(childVisitor);
for (final CopyRule rule : childVisitor.rules) {
rule.addCondition(condition);
rules.add(rule);
}
superclasses.addAll(childVisitor.superclasses);
superproperties.addAll(childVisitor.superproperties);
}
}
@Override
public void meet(final Join node) throws QueryRulesetException {
final TupleExpr left = node.getLeftArg();
final TupleExpr right = node.getRightArg();
// If this join represents the application of inference logic, use its children to add the
// appropriate rules.
if (node instanceof InferJoin && left instanceof FixedStatementPattern) {
FixedStatementPattern fsp = (FixedStatementPattern) left;
final Value predValue = fsp.getPredicateVar().getValue();
// If this is a subClassOf relation, fetch all subClassOf and equivalentClass
// relations involving the relevant classes.
if (RDFS.SUBCLASSOF.equals(predValue) && right instanceof StatementPattern) {
final StatementPattern dne = (StatementPattern) right;
// If a subClassOf b equivalentClass c subClassOf d, then fsp will contain a statement
// for each class in the hierarchy. If we match every subClassOf and equivalentClass
// relation to any of {a,b,c,d}, then the hierarchy can be reconstructed.
for (final Statement st : fsp.statements) {
final Value superclassVal = st.getSubject();
// Rule to match the type assignment:
rules.add(new CopyRule(new StatementPattern(dne.getSubjectVar(),
dne.getPredicateVar(),
new Var(superclassVal.toString(), superclassVal),
dne.getContextVar())));
// Add to the set of classes for which we need the hierarchy:
superclasses.add(superclassVal);
}
}
// If this is a subPropertyOf relation, fetch all subPropertyOf and equivalentProperty
// relations involving the relevant properties.
else if (RDFS.SUBPROPERTYOF.equals(predValue) && right instanceof StatementPattern) {
final StatementPattern dne = (StatementPattern) right;
// If p subPropertyOf q subPropertyOf r subPropertyOf s, then fsp will contain a statement
// for each property in the hierarchy. If we match every subPropertyOf and equivalentProperty
// relation to any of {p,q,r,s}, then the hierarchy can be reconstructed.
for (final Statement st : fsp.statements) {
final Value superpropVal = st.getSubject();
// Rule to add the property:
rules.add(new CopyRule(new StatementPattern(dne.getSubjectVar(),
new Var(superpropVal.toString(), superpropVal),
dne.getObjectVar(),
dne.getContextVar())));
// Add to the set of properties for which we need the hierarchy:
superproperties.add(superpropVal);
}
}
// If this is a sameAs expansion, it may have one or two levels
if (OWL.SAMEAS.equals(predValue)) {
StatementPattern stmt = null;
final String replaceVar = fsp.getSubjectVar().getName();
String replaceVarInner = null;
final List<Value> replacements = new LinkedList<>();
final List<Value> replacementsInner = new LinkedList<>();
for (final Statement st : fsp.statements) {
replacements.add(st.getSubject());
}
if (right instanceof StatementPattern) {
stmt = (StatementPattern) right;
}
else if (right instanceof InferJoin) {
// Add the second set of replacements if given
final InferJoin inner = (InferJoin) right;
if (inner.getLeftArg() instanceof FixedStatementPattern
&& inner.getRightArg() instanceof StatementPattern) {
stmt = (StatementPattern) inner.getRightArg();
fsp = (FixedStatementPattern) inner.getLeftArg();
replaceVarInner = fsp.getSubjectVar().getName();
for (final Statement st : fsp.statements) {
replacementsInner.add(st.getSubject());
}
}
}
// Add different versions of the original statement:
if (stmt != null) {
for (final Value replacementVal : replacements) {
if (replacementsInner.isEmpty()) {
final StatementPattern transformed = stmt.clone();
if (transformed.getSubjectVar().equals(replaceVar)) {
transformed.setSubjectVar(new Var(replaceVar, replacementVal));
}
if (transformed.getObjectVar().equals(replaceVar)) {
transformed.setObjectVar(new Var(replaceVar, replacementVal));
}
rules.add(new CopyRule(transformed));
}
for (final Value replacementValInner : replacementsInner) {
final StatementPattern transformed = stmt.clone();
if (transformed.getSubjectVar().equals(replaceVar)) {
transformed.setSubjectVar(new Var(replaceVar, replacementVal));
}
else if (transformed.getSubjectVar().equals(replaceVarInner)) {
transformed.setSubjectVar(new Var(replaceVarInner, replacementValInner));
}
if (transformed.getObjectVar().equals(replaceVar)) {
transformed.setObjectVar(new Var(replaceVar, replacementVal));
}
else if (transformed.getObjectVar().equals(replaceVarInner)) {
transformed.setObjectVar(new Var(replaceVar, replacementValInner));
}
rules.add(new CopyRule(transformed));
}
}
}
// Add to the set of resources for which we need sameAs relations:
sameAs.addAll(replacements);
sameAs.addAll(replacementsInner);
}
}
// If it's a normal join, visit the children.
else {
super.meet(node);
}
}
@Override
public void meet(final Union node) throws QueryRulesetException {
node.visitChildren(this);
if (node instanceof InferUnion) {
// If this is the result of inference, search each tree for (non-standard) properties and add them
// to the set of properties for which to include schema information.
final AbstractQueryModelVisitor<QueryRulesetException> propertyVisitor = new AbstractQueryModelVisitor<QueryRulesetException>() {
@Override
public void meet(final StatementPattern node) {
if (node.getPredicateVar().hasValue()) {
final IRI predValue = (IRI) node.getPredicateVar().getValue();
final String ns = predValue.getNamespace();
if (node instanceof FixedStatementPattern
&& (RDFS.SUBPROPERTYOF.equals(predValue) || OWL.EQUIVALENTPROPERTY.equals(predValue))) {
// This FSP replaced a property, so find all the properties it entails
final FixedStatementPattern fsp = (FixedStatementPattern) node;
for (final Statement stmt : fsp.statements) {
schemaProperties.add(stmt.getSubject());
}
}
else if (!(OWL.NAMESPACE.equals(ns) || RDFS.NAMESPACE.equals(ns) || RDF.NAMESPACE.equals(ns))) {
// This is a regular triple pattern; grab its predicate
schemaProperties.add(predValue);
}
}
}
};
node.getLeftArg().visit(propertyVisitor);
node.getRightArg().visit(propertyVisitor);
}
}
/**
* Add rules covering the portions of the schema that may be necessary to use inference
* with this query.
*/
public void addSchema() throws QueryRulesetException {
// Combine the relevant portions of the class hierarchy into one subclass rule and one equivalent class rule:
if (!superclasses.isEmpty()) {
final Var superClassVar = new Var("superClassVar");
// Subclasses of the given classes:
addListRule(new Var("subClassVar"), null, RDFS.SUBCLASSOF, superClassVar, superclasses);
// Equivalent classes to the given classes (this might be stated in either direction):
addListRule(new Var("eqClassSubject"), superclasses, OWL.EQUIVALENTCLASS, new Var("eqClassObject"), superclasses);
}
// Combine the relevant portions of the property hierarchy into one subproperty rule and one equivalent property rule:
if (!superproperties.isEmpty()) {
final Var superPropertyVar = new Var("superPropertyVar");
// Subproperties of the given properties:
addListRule(new Var("subPropertyVar"), null, RDFS.SUBPROPERTYOF, superPropertyVar, superproperties);
// Equivalent properties to the given properties (this might be stated in either direction):
addListRule(new Var("eqPropSubject"), superproperties, OWL.EQUIVALENTPROPERTY, new Var("eqPropObject"), superproperties);
}
// Get the relevant portions of the owl:sameAs graph
if (!sameAs.isEmpty()) {
final Var sameAsSubj = new Var("sameAsSubject");
final Var sameAsObj = new Var("sameAsObject");
addListRule(sameAsSubj, sameAs, OWL.SAMEAS, sameAsObj, sameAs);
}
// Get the potentially relevant owl:TransitiveProperty statements
if (!transitive.isEmpty()) {
final Var transitiveVar = new Var(OWL.TRANSITIVEPROPERTY.toString(), OWL.TRANSITIVEPROPERTY);
addListRule(new Var("transitiveProp"), transitive, RDF.TYPE, transitiveVar, null);
}
// Get any owl:SymmetricProperty and owl:inverseOf statements for relevant properties
if (!schemaProperties.isEmpty()) {
final Var symmetricVar = new Var(OWL.SYMMETRICPROPERTY.toString(), OWL.SYMMETRICPROPERTY);
addListRule(new Var("symmetricProp"), schemaProperties, RDF.TYPE, symmetricVar, null);
addListRule(new Var("inverseSubject"), schemaProperties, OWL.INVERSEOF, new Var("inverseObject"), schemaProperties);
}
}
/**
* Build and add a rule that matches triples having a specific predicate, where subject and object constraints
* are each defined using a Var and a set of Values, and each can represent one of: a constant value
* (Var has a value), an enumerated set of possible values, to be turned into a filter (Var has no
* Value and set of Values is non-null), or an unconstrained variable (Var has no value and set of
* Values is null). If both subject and object are variables with enumerated sets, only one part needs to
* match in order to accept the triple.
* @param subjVar Var corresponding to the subject. May have a specific value or represent a variable.
* @param subjValues Either null or a Set of Values that the subject variable can have, tested using a filter.
* @param predicate The URI for the predicate to match
* @param objVar Var corresponding to the object. May have a specific value or represent a variable.
* @param objValues Either null or a Set of Values that the object variable can have, tested using a filter
* @throws QueryRulesetException if the rule can't be created
*/
private void addListRule(final Var subjVar, final Set<Value> subjValues, final IRI predicate,
final Var objVar, final Set<Value> objValues) throws QueryRulesetException {
ListMemberOperator subjCondition = null;
ListMemberOperator objCondition = null;
if (subjValues != null) {
subjCondition = new ListMemberOperator();
subjCondition.addArgument(subjVar);
for (final Value constant : subjValues) {
subjCondition.addArgument(new Var(constant.toString(), constant));
}
}
if (objValues != null) {
objCondition = new ListMemberOperator();
objCondition.addArgument(objVar);
for (final Value constant : objValues) {
objCondition.addArgument(new Var(constant.toString(), constant));
}
}
final Var predVar = new Var(predicate.toString(), predicate);
final CopyRule listRule = new CopyRule(new StatementPattern(subjVar, predVar, objVar));
if (subjCondition != null && objCondition != null) {
listRule.addCondition(new Or(subjCondition, objCondition));
}
else if (subjCondition != null) {
listRule.addCondition(subjCondition);
}
else if (objCondition != null) {
listRule.addCondition(objCondition);
}
rules.add(listRule);
}
}
/**
* The rules themselves -- any statement satisfying any of these rules will be copied.
*/
protected Set<CopyRule> rules = new HashSet<>();
/**
* The SPARQL query that defines the ruleset.
*/
protected String query;
/**
* A Rya configuration.
*/
protected RdfCloudTripleStoreConfiguration conf;
/**
* Extract a set of rules from a query found in a Configuration.
* @param conf Configuration containing either the query string, or name of a file containing the query, plus inference parameters.
* @throws QueryRulesetException if the query can't be read, parsed, and resolved to valid rules
*/
public QueryRuleset(final RdfCloudTripleStoreConfiguration conf) throws QueryRulesetException {
this.conf = conf;
setQuery();
setRules();
}
/**
* Extract a set of rules from a query.
* @param query A SPARQL query string
* @throws QueryRulesetException if the query can't be parsed and resolved to valid rules
*/
public QueryRuleset(final String query) throws QueryRulesetException {
this.query = query;
setRules();
}
/**
* Get the query that was used to construct this ruleset.
* @return A SPARQL query
*/
public String getQuery() {
return query;
}
/**
* Set this ruleset's defining query based on the configuration. Query can be
* specified directly or using a file; if it's read from a file, the query
* text will also be added to the configuration.
* @return SPARQL query
* @throws QueryRulesetException if there is no configuration, or if the query can't be found or read
*/
private void setQuery() throws QueryRulesetException {
if (conf == null) {
throw new QueryRulesetException("No Configuration given");
}
query = conf.get(CopyTool.QUERY_STRING_PROP);
final String queryFile = conf.get(CopyTool.QUERY_FILE_PROP);
if (query == null && queryFile != null) {
try {
query = FileUtils.readFileToString(new File(queryFile), StandardCharsets.UTF_8);
conf.set(CopyTool.QUERY_STRING_PROP, query);
}
catch (final IOException e) {
throw new QueryRulesetException("Error loading query from file: " + queryFile, e);
}
}
else if (query == null) {
throw new QueryRulesetException("No query string or query file provided");
}
}
/**
* Extract the rules from the query string, applying inference rules if configured to.
* @throws QueryRulesetException if the parsed query can't be parsed and translated into valid rules.
*/
private void setRules() throws QueryRulesetException {
final ParsedTupleQuery ptq;
final TupleExpr te;
try {
ptq = QueryParserUtil.parseTupleQuery(QueryLanguage.SPARQL, query, null);
}
catch (UnsupportedQueryLanguageException | MalformedQueryException e) {
throw new QueryRulesetException("Error parsing query:\n" + query, e);
}
te = ptq.getTupleExpr();
// Before converting to rules (and renaming variables), validate that no statement patterns
// consist of only variables (this would result in a rule that matches every triple).
// Needs to be done before inference, since inference rules may create such statement patterns
// that are OK because they won'd be converted to rules directly.
te.visit(new AbstractQueryModelVisitor<QueryRulesetException>() {
@Override
public void meet(final StatementPattern node) throws QueryRulesetException {
if (!(node.getSubjectVar().hasValue() || node.getPredicateVar().hasValue() || node.getObjectVar().hasValue())) {
throw new QueryRulesetException("Statement pattern with no constants would match every statement:\n"
+ node + "\nFrom parsed query:\n" + te);
}
}
});
// Apply inference, if applicable
if (conf != null && conf.isInfer()) {
RdfCloudTripleStore store = null;
try {
log.info("Applying inference rules");
store = (RdfCloudTripleStore) RyaSailFactory.getInstance(conf);
final InferenceEngine inferenceEngine = store.getInferenceEngine();
// Apply in same order as query evaluation:
te.visit(new TransitivePropertyVisitor(conf, inferenceEngine));
te.visit(new SymmetricPropertyVisitor(conf, inferenceEngine));
te.visit(new InverseOfVisitor(conf, inferenceEngine));
te.visit(new SubPropertyOfVisitor(conf, inferenceEngine));
te.visit(new SubClassOfVisitor(conf, inferenceEngine));
te.visit(new SameAsVisitor(conf, inferenceEngine));
log.info("Query after inference:\n");
for (final String line : te.toString().split("\n")) {
log.info("\t" + line);
}
}
catch (final Exception e) {
throw new QueryRulesetException("Error applying inference to parsed query:\n" + te, e);
}
finally {
if (store != null) {
try {
store.shutDown();
} catch (final SailException e) {
log.error("Error shutting down Sail after applying inference", e);
}
}
}
}
// Extract the StatementPatterns and Filters and turn them into rules:
final RulesetVisitor rv = new RulesetVisitor();
try {
te.visit(rv);
rv.addSchema();
}
catch (final QueryRulesetException e) {
throw new QueryRulesetException("Error extracting rules from parsed query:\n" + te, e);
}
for (final CopyRule candidateRule : rv.rules) {
boolean unique = true;
for (final CopyRule otherRule : rv.rules) {
if (!candidateRule.equals(otherRule) && otherRule.isGeneralizationOf(candidateRule)) {
unique = false;
break;
}
}
if (unique) {
rules.add(candidateRule);
}
}
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder("Original Query:\n\n\t");
sb.append(query.replace("\n", "\n\t")).append("\n\nRuleset:\n");
for (final CopyRule rule : rules) {
sb.append("\n\t").append(rule.toString().replace("\n", "\n\t")).append("\n");
}
return sb.toString();
}
}