blob: f4abd7e3d386121c82d658aa9518a16c330da5f6 [file] [log] [blame]
package org.apache.rya.rdftriplestore.evaluation;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import static com.google.common.base.Preconditions.checkNotNull;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.rya.api.RdfCloudTripleStoreConfiguration;
import org.apache.rya.api.persist.RdfEvalStatsDAO;
import org.apache.rya.api.persist.RdfEvalStatsDAO.CARDINALITY_OF;
import org.apache.rya.rdftriplestore.inference.DoNotExpandSP;
import org.apache.rya.rdftriplestore.utils.FixedStatementPattern;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.query.algebra.BinaryTupleOperator;
import org.eclipse.rdf4j.query.algebra.Filter;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.Projection;
import org.eclipse.rdf4j.query.algebra.Slice;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.UnaryTupleOperator;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics;
/**
* Class RdfCloudTripleStoreEvaluationStatistics
* Date: Apr 12, 2011
* Time: 1:31:05 PM
*/
public class RdfCloudTripleStoreEvaluationStatistics<C extends RdfCloudTripleStoreConfiguration> extends EvaluationStatistics {
private C conf;
private RdfEvalStatsDAO<C> rdfEvalStatsDAO;
protected boolean pushEmptyRdfTypeDown = true;
protected boolean useCompositeCardinalities = true;
public RdfCloudTripleStoreEvaluationStatistics(final C conf, final RdfEvalStatsDAO<C> rdfEvalStatsDAO) {
checkNotNull(conf);
checkNotNull(rdfEvalStatsDAO);
try {
this.conf = conf;
this.rdfEvalStatsDAO = rdfEvalStatsDAO;
pushEmptyRdfTypeDown = conf.isStatsPushEmptyRdftypeDown();
useCompositeCardinalities = conf.isUseCompositeCardinality();
} catch (final Exception e) {
throw new RuntimeException(e);
}
}
@Override
public double getCardinality(final TupleExpr expr) {
if (expr instanceof Filter) {
final Filter f = (Filter) expr;
// filters must make sets smaller
return super.getCardinality(f.getArg()) / 10;
}
return super.getCardinality(expr);
}
@Override
protected CardinalityCalculator createCardinalityCalculator() {
return new RdfCloudTripleStoreCardinalityCalculator(this);
}
public RdfEvalStatsDAO<C> getRdfEvalStatsDAO() {
return rdfEvalStatsDAO;
}
public void setRdfEvalStatsDAO(final RdfEvalStatsDAO<C> rdfEvalStatsDAO) {
this.rdfEvalStatsDAO = rdfEvalStatsDAO;
}
public class RdfCloudTripleStoreCardinalityCalculator extends CardinalityCalculator {
private final RdfCloudTripleStoreEvaluationStatistics<C> statistics;
protected Map<Var, Collection<Statement>> fspMap;
public RdfCloudTripleStoreCardinalityCalculator(final RdfCloudTripleStoreEvaluationStatistics<C> statistics) {
this.statistics = statistics;
}
@Override
protected double getCardinality(final StatementPattern sp) {
final Var subjectVar = sp.getSubjectVar();
final Resource subj = (Resource) getConstantValue(subjectVar);
final Var predicateVar = sp.getPredicateVar();
final IRI pred = (IRI) getConstantValue(predicateVar);
final Var objectVar = sp.getObjectVar();
final Value obj = getConstantValue(objectVar);
final Resource context = (Resource) getConstantValue(sp.getContextVar());
// set rdf type to be a max value (as long as the object/subject aren't specified) to
if (pred != null) {
if (statistics.pushEmptyRdfTypeDown && RDF.TYPE.equals(pred) && subj == null && obj == null) {
return Double.MAX_VALUE;
}
}
// FixedStatementPattern indicates that this is when backward chaining reasoning is being used
if (sp instanceof FixedStatementPattern) {
//no query here
final FixedStatementPattern fsp = (FixedStatementPattern) sp;
//TODO: assume that only the subject is open ended here
final Var fspSubjectVar = fsp.getSubjectVar();
if (fspSubjectVar != null && fspSubjectVar.getValue() == null) {
if (fspMap == null) {
fspMap = new HashMap<Var, Collection<Statement>>();
}
fspMap.put(fspSubjectVar, fsp.statements);
}
return fsp.statements.size();
}
/**
* Use the output of the FixedStatementPattern to determine more information
*/
if (fspMap != null && sp instanceof DoNotExpandSP) {
//TODO: Might be a better way than 3 map pulls
RdfEvalStatsDAO.CARDINALITY_OF cardinality_of = null;
Collection<Statement> statements = null;
// TODO unsure of how to incorporate additional cardinalities here
if (objectVar != null && objectVar.getValue() == null) {
statements = fspMap.get(objectVar);
cardinality_of = RdfEvalStatsDAO.CARDINALITY_OF.OBJECT;
}
if (statements == null && predicateVar != null && predicateVar.getValue() == null) {
statements = fspMap.get(predicateVar);
cardinality_of = RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE;
}
if (statements == null && subjectVar != null && subjectVar.getValue() == null) {
statements = fspMap.get(subjectVar);
cardinality_of = RdfEvalStatsDAO.CARDINALITY_OF.SUBJECT;
}
if (statements != null) {
double fspCard = 0;
for (final Statement statement : statements) {
final List<Value> values = new ArrayList<Value>();
values.add(statement.getSubject());
fspCard += rdfEvalStatsDAO.getCardinality(conf, cardinality_of, values, context);
}
return fspCard;
}
}
/**
* We put full triple scans before rdf:type because more often than not
* the triple scan is being joined with something else that is better than
* asking the full rdf:type of everything.
*/
double cardinality = Double.MAX_VALUE - 1;
try {
if (subj != null) {
final List<Value> values = new ArrayList<Value>();
CARDINALITY_OF card = RdfEvalStatsDAO.CARDINALITY_OF.SUBJECT;
values.add(subj);
if (useCompositeCardinalities){
if (pred != null){
values.add(pred);
card = RdfEvalStatsDAO.CARDINALITY_OF.SUBJECTPREDICATE;
}
else if (obj != null){
values.add(obj);
card = RdfEvalStatsDAO.CARDINALITY_OF.SUBJECTOBJECT;
}
}
double evalCard = evalCard = rdfEvalStatsDAO.getCardinality(conf, card, values, context);
// the cardinality will be -1 if there was no value found (if the index does not exist)
if (evalCard >= 0) {
cardinality = Math.min(cardinality, evalCard);
} else {
cardinality = 1;
}
}
else if (pred != null) {
final List<Value> values = new ArrayList<Value>();
CARDINALITY_OF card = RdfEvalStatsDAO.CARDINALITY_OF.PREDICATE;
values.add(pred);
if (useCompositeCardinalities){
if (obj != null){
values.add(obj);
card = RdfEvalStatsDAO.CARDINALITY_OF.PREDICATEOBJECT;
}
}
double evalCard = evalCard = rdfEvalStatsDAO.getCardinality(conf, card, values, context);
if (evalCard >= 0) {
cardinality = Math.min(cardinality, evalCard);
} else {
cardinality = 1;
}
}
else if (obj != null) {
final List<Value> values = new ArrayList<Value>();
values.add(obj);
final double evalCard = rdfEvalStatsDAO.getCardinality(conf, RdfEvalStatsDAO.CARDINALITY_OF.OBJECT, values, context);
if (evalCard >= 0) {
cardinality = Math.min(cardinality, evalCard);
} else {
cardinality = 1;
}
}
} catch (final Exception e) {
throw new RuntimeException(e);
}
return cardinality;
}
@Override
protected void meetUnaryTupleOperator(final UnaryTupleOperator node) {
if (node instanceof Projection) {
cardinality += -1.0;
}
super.meetUnaryTupleOperator(node);
}
@Override
protected void meetBinaryTupleOperator(final BinaryTupleOperator node) {
node.getLeftArg().visit(this);
final double leftArgCost = cardinality;
node.getRightArg().visit(this);
cardinality += leftArgCost;
}
// TODO Is this sufficient for add capability of slice node?
@Override
public void meet(final Slice node) {
cardinality = node.getLimit();
}
@Override
public void meet(final Join node) {
node.getLeftArg().visit(this);
final double leftArgCost = cardinality;
node.getRightArg().visit(this);
if (leftArgCost > cardinality) {
cardinality = leftArgCost; //TODO: Is this ok?
}
}
protected Value getConstantValue(final Var var) {
if (var != null) {
return var.getValue();
} else {
return null;
}
}
}
}