blob: cf4ca8f7cd81a118803815070f7152e623263c0f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.rya.benchmark.query;
import static com.google.common.base.Preconditions.checkArgument;
import static java.util.Objects.requireNonNull;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Queue;
import org.apache.hadoop.conf.Configuration;
import org.apache.rya.indexing.external.tupleSet.ExternalTupleSet;
import org.apache.rya.indexing.external.tupleSet.SimpleExternalTupleSet;
import org.apache.rya.indexing.pcj.matching.PCJOptimizer;
import org.apache.rya.indexing.pcj.matching.provider.AccumuloIndexSetProvider;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.CommandLineOptionException;
import org.openjdk.jmh.runner.options.CommandLineOptions;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import org.openrdf.query.MalformedQueryException;
import org.openrdf.query.algebra.Projection;
import org.openrdf.query.algebra.TupleExpr;
import org.openrdf.query.parser.sparql.SPARQLParser;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import edu.umd.cs.findbugs.annotations.DefaultAnnotation;
import edu.umd.cs.findbugs.annotations.NonNull;
/**
* A benchmark that may be used to evaluate the performance of {@link PCJOptimizer}.
* It pivots over three dimensions:
* <ul>
* <li>How many Statement Patterns the optimized query has.</li>
* <li>How many PCJ indices the optimizer has available to it.</li>
* <li>How many Statement Patterns each PCJ has.</li>
* </ul>
* To execute this benchmark, build the project by executing:
* <pre>
* mvn clean install
* </pre>
* Transport the "target/benchmarking.jar" file to the system that will execute
* the benchmark, write the configuration file, and then execute:
* <pre>
* java -cp benchmarks.jar org.apache.rya.benchmark.query.PCJOptimizerBenchmark
* </pre>
*/
@State(Scope.Thread)
@DefaultAnnotation(NonNull.class)
public class PCJOptimizerBenchmark {
/**
* Variables that may be used when building SPARQL queries.
*/
private static final List<String> variables = Lists.newArrayList("?a","?b",
"?c","?d","?e","?f","?g","?h","?i","?j","?k","?l","?m","?n","?o",
"?p","?q","?r","?s","?t","?u","?v","?w","?x","?y","?z");
// Parameters that effect which PCJs are used by the benchmark.
@Param({"0", "1", "2", "3", "4", "5", "6"})
public int numPCJs;
@Param({"2", "3", "4", "5", "6"})
public int pcjSPCount;
// Parameters that effect the Query that is being optimized by the benchmark.
@Param({"1", "2", "3", "4", "5", "6"})
public int querySPCount;
// Cached benchmark data that is generated during the setup phase.
private final Map<BenchmarkParams, BenchmarkValues> chainedBenchmarkValues = new HashMap<>();
private final Map<BenchmarkParams, BenchmarkValues> unchainedBenchmarkValues = new HashMap<>();
@Setup
public void buildBenchmarkValues() throws Exception {
for(int numPCJs = 0; numPCJs <= 6; numPCJs++) {
for(int pcjSPCount = 2; pcjSPCount <= 6; pcjSPCount++) {
for(int querySPCount = 1; querySPCount <= 6; querySPCount++) {
final BenchmarkParams benchmarkParams = new BenchmarkParams(numPCJs, pcjSPCount, querySPCount);
final BenchmarkValues chainedValues = new BenchmarkValues(
makeChainedQuery(benchmarkParams),
makeChainedPCJOptimizer(benchmarkParams));
chainedBenchmarkValues.put(benchmarkParams, chainedValues);
final BenchmarkValues unchainedValues = new BenchmarkValues(
makeUnchainedQuery(benchmarkParams),
makeUnchainedPCJOptimizer(benchmarkParams));
unchainedBenchmarkValues.put(benchmarkParams, unchainedValues);
}
}
}
}
@Benchmark
public void optimizeQuery_unchained() throws MalformedQueryException {
// Fetch the pieces that benchmark uses.
final BenchmarkValues values = unchainedBenchmarkValues.get( new BenchmarkParams(numPCJs, pcjSPCount, querySPCount) );
final PCJOptimizer pcjOptimizer = values.getPCJOptimizer();
final TupleExpr query = values.getQuery();
// Perform the optimization.
pcjOptimizer.optimize(query, null, null);
}
@Benchmark
public void optimizeQuery_chained() throws MalformedQueryException {
// Fetch the pieces that benchmark uses.
final BenchmarkValues values = chainedBenchmarkValues.get( new BenchmarkParams(numPCJs, pcjSPCount, querySPCount) );
final PCJOptimizer pcjOptimizer = values.getPCJOptimizer();
final TupleExpr query = values.getQuery();
// Perform the optimization.
pcjOptimizer.optimize(query, null, null);
}
private static TupleExpr makeUnchainedQuery(final BenchmarkParams params) throws MalformedQueryException {
final Queue<String> varQueue= Lists.newLinkedList(variables);
final SPARQLParser parser = new SPARQLParser();
final List<String> queryVars = new ArrayList<>();
// The first statement pattern has two variables.
queryVars.add( varQueue.remove() );
queryVars.add( varQueue.remove() );
// The each extra statement pattern joins with the previous one, so only need one more variable each.
for(int i = 1; i < params.getQuerySPCount(); i++) {
queryVars.add( varQueue.remove() );
queryVars.add( varQueue.remove() );
}
final String sparql = buildUnchainedSPARQL(queryVars);
return parser.parseQuery(sparql, null).getTupleExpr();
}
private static TupleExpr makeChainedQuery(final BenchmarkParams params) throws MalformedQueryException {
final Queue<String> varQueue= Lists.newLinkedList(variables);
final SPARQLParser parser = new SPARQLParser();
final List<String> queryVars = new ArrayList<>();
// The first statement pattern has two variables.
queryVars.add( varQueue.remove() );
queryVars.add( varQueue.remove() );
// The each extra statement pattern joins with the previous one, so only need one more variable each.
for(int i = 1; i < params.getQuerySPCount(); i++) {
queryVars.add( varQueue.remove() );
}
final String sparql = buildChainedSPARQL(queryVars);
return parser.parseQuery(sparql, null).getTupleExpr();
}
private static PCJOptimizer makeUnchainedPCJOptimizer(final BenchmarkParams params) throws Exception {
final Queue<String> varQueue= Lists.newLinkedList(variables);
final SPARQLParser parser = new SPARQLParser();
final List<ExternalTupleSet> indices = new ArrayList<>();
// Create the first PCJ.
final List<String> pcjVars = new ArrayList<>();
pcjVars.add( varQueue.remove() );
pcjVars.add( varQueue.remove() );
for(int spI = 1; spI < params.getPCJSPCount(); spI++) {
pcjVars.add( varQueue.remove() );
pcjVars.add( varQueue.remove() );
}
String pcjSparql = buildUnchainedSPARQL(pcjVars);
Projection projection = (Projection) parser.parseQuery(pcjSparql, null).getTupleExpr();
indices.add( new SimpleExternalTupleSet(projection) );
// Add the rest of the PCJs.
for(int pcjI = 1; pcjI < params.getNumPCJS(); pcjI++) {
// Remove the previous PCJs first variable.
pcjVars.remove(0);
pcjVars.remove(0);
// And add a new one to the end of it.
pcjVars.add( varQueue.remove() );
pcjVars.add( varQueue.remove() );
// Build the index.
pcjSparql = buildUnchainedSPARQL(pcjVars);
projection = (Projection) parser.parseQuery(pcjSparql, null).getTupleExpr();
indices.add( new SimpleExternalTupleSet(projection) );
}
// Create the optimizer.
return new PCJOptimizer(indices, false, new AccumuloIndexSetProvider(new Configuration()));
}
private static PCJOptimizer makeChainedPCJOptimizer(final BenchmarkParams params) throws Exception {
final Queue<String> varQueue= Lists.newLinkedList(variables);
final SPARQLParser parser = new SPARQLParser();
final List<ExternalTupleSet> indices = new ArrayList<>();
// Create the first PCJ.
final List<String> pcjVars = new ArrayList<>();
pcjVars.add( varQueue.remove() );
pcjVars.add( varQueue.remove() );
for(int spI = 1; spI < params.getPCJSPCount(); spI++) {
pcjVars.add( varQueue.remove() );
}
String pcjSparql = buildChainedSPARQL(pcjVars);
Projection projection = (Projection) parser.parseQuery(pcjSparql, null).getTupleExpr();
indices.add( new SimpleExternalTupleSet(projection) );
// Add the rest of the PCJs.
for(int pcjI = 1; pcjI < params.getNumPCJS(); pcjI++) {
// Remove the previous PCJs first variable.
pcjVars.remove(0);
// And add a new one to the end of it.
pcjVars.add( varQueue.remove() );
// Build the index.
pcjSparql = buildChainedSPARQL(pcjVars);
projection = (Projection) parser.parseQuery(pcjSparql, null).getTupleExpr();
indices.add( new SimpleExternalTupleSet(projection) );
}
// Create the optimizer.
return new PCJOptimizer(indices, false, new AccumuloIndexSetProvider(new Configuration()));
}
private static String buildUnchainedSPARQL(final List<String> vars) {
checkArgument(vars.size() % 2 == 0);
final Queue<String> varQueue= Lists.newLinkedList(vars);
final List<String> statementPatterns = new ArrayList<>();
// Create the first SP.
String var1 = varQueue.remove();
String var2 = varQueue.remove();
statementPatterns.add( var1 + " <urn:predicate> " + var2);
// Need two more variables for every following statement pattern.
while(!varQueue.isEmpty()) {
var1 = varQueue.remove();
var2 = varQueue.remove();
statementPatterns.add( var1 + " <urn:predicate> " + var2);
}
return "select " + Joiner.on(" ").join(vars) + " where { " +
Joiner.on(" . ").join(statementPatterns) +
" . }" ;
}
private static String buildChainedSPARQL(final List<String> vars) {
final Queue<String> varQueue= Lists.newLinkedList(vars);
final List<String> statementPatterns = new ArrayList<>();
// Create the first SP.
final String var1 = varQueue.remove();
final String var2 = varQueue.remove();
statementPatterns.add( var1 + " <urn:predicate> " + var2);
// Chain the rest of the SPs off of each other.
String lastVar = var2;
while(!varQueue.isEmpty()) {
final String var = varQueue.remove();
statementPatterns.add( lastVar + " <urn:predicate> " + var);
lastVar = var;
}
// Build the SPARQL query from the pieces.
return "select " + Joiner.on(" ").join(vars) + " where { " +
Joiner.on(" . ").join(statementPatterns) +
" . }" ;
}
/**
* The parameter values used by the benchmark. Used to lookup a benchmark' {@link BenchmarkValues}.
*/
@DefaultAnnotation(NonNull.class)
public static class BenchmarkParams {
private final int numPCJs;
private final int pcjSPCount;
private final int querySPCount;
/**
* Constructs an instance of {@link BenchmarkParams}.
*
* @param numPCJs - The number of PCJs that will be available to the {@link PCJOptimizer}. (not null)
* @param pcjSPCount - The number of Statement Patterns that are in each PCJs. (not null)
* @param querySPCount - The number of Statement Patterns that are in the query that will be optimized. (not null)
*/
public BenchmarkParams(final int numPCJs, final int pcjSPCount, final int querySPCount){
this.numPCJs = numPCJs;
this.pcjSPCount = pcjSPCount;
this.querySPCount = querySPCount;
}
/**
* @return The number of PCJs that will be available to the {@link PCJOptimizer}.
*/
public int getNumPCJS() {
return numPCJs;
}
/**
* @return The number of Statement Patterns that are in each PCJs.
*/
public int getPCJSPCount() {
return pcjSPCount;
}
/**
* @return The number of Statement Patterns that are in the query that will be optimized.
*/
public int getQuerySPCount() {
return querySPCount;
}
@Override
public int hashCode() {
return Objects.hash(numPCJs, pcjSPCount, querySPCount);
}
@Override
public boolean equals(final Object other) {
if(this == other) {
return true;
}
if(other instanceof BenchmarkParams) {
final BenchmarkParams key = (BenchmarkParams) other;
return numPCJs == key.numPCJs &&
pcjSPCount == key.pcjSPCount &&
querySPCount == key.querySPCount;
}
return false;
}
}
/**
* Holds onto the SPARQL query that will be optimized as well as the optimizers
* that will be used to optimize the query.
*/
@DefaultAnnotation(NonNull.class)
public static class BenchmarkValues {
private final TupleExpr query;
private final PCJOptimizer optimizer;
/**
* Constructs an isntance of {@link BenchmarkValues}.
*
* @param query - The SPARQL query to optimize.
* @param optimizer - The optimizer used to optimize the query.
*/
public BenchmarkValues(final TupleExpr query, final PCJOptimizer optimizer) {
this.query = requireNonNull(query);
this.optimizer = requireNonNull(optimizer);
}
/**
* @return The SPARQL query to optimize.
*/
public TupleExpr getQuery() {
return query;
}
/**
* @return The optimizer used to optimize the query.
*/
public PCJOptimizer getPCJOptimizer() {
return optimizer;
}
}
/**
* Runs the PCJOptimizer benchmarks.
* </p>
* Example command line:
* <pre>
* java -cp benchmarks.jar org.apache.rya.benchmark.query.PCJOptimizerBenchmark
* </pre>
*
* @param args - The command line arguments that will be fed into the benchmark.
* @throws Exception The benchmark could not be run.
*/
public static void main(final String[] args) throws RunnerException, MalformedQueryException, CommandLineOptionException {
final OptionsBuilder opts = new OptionsBuilder();
opts.parent( new CommandLineOptions(args) );
opts.include(PCJOptimizerBenchmark.class.getSimpleName());
new Runner(opts.build()).run();
}
}