dao/mongodb.rya/src/main/java/org/apache/rya/mongodb/aggregation/AggregationPipelineQueryNode.java - rya - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.rya.mongodb.aggregation;

 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.CONTEXT;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.DOCUMENT_VISIBILITY;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_HASH;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_LANGUAGE;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.OBJECT_TYPE;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.PREDICATE_HASH;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.STATEMENT_METADATA;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.SUBJECT_HASH;
 import static org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy.TIMESTAMP;

 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableSet;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.ConcurrentSkipListSet;
 import java.util.function.Function;

 import org.apache.rya.api.domain.RyaIRI;
 import org.apache.rya.api.domain.RyaStatement;
 import org.apache.rya.api.domain.RyaType;
 import org.apache.rya.api.domain.StatementMetadata;
 import org.apache.rya.api.resolver.RdfToRyaConversions;
 import org.apache.rya.mongodb.MongoDbRdfConstants;
 import org.apache.rya.mongodb.dao.MongoDBStorageStrategy;
 import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy;
 import org.apache.rya.mongodb.document.operators.query.ConditionalOperators;
 import org.apache.rya.mongodb.document.visibility.DocumentVisibilityAdapter;
 import org.bson.Document;
 import org.bson.conversions.Bson;
 import org.eclipse.rdf4j.common.iteration.CloseableIteration;
 import org.eclipse.rdf4j.model.IRI;
 import org.eclipse.rdf4j.model.Literal;
 import org.eclipse.rdf4j.model.Resource;
 import org.eclipse.rdf4j.model.Value;
 import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
 import org.eclipse.rdf4j.query.BindingSet;
 import org.eclipse.rdf4j.query.QueryEvaluationException;
 import org.eclipse.rdf4j.query.algebra.Compare;
 import org.eclipse.rdf4j.query.algebra.ExtensionElem;
 import org.eclipse.rdf4j.query.algebra.ProjectionElem;
 import org.eclipse.rdf4j.query.algebra.ProjectionElemList;
 import org.eclipse.rdf4j.query.algebra.StatementPattern;
 import org.eclipse.rdf4j.query.algebra.ValueConstant;
 import org.eclipse.rdf4j.query.algebra.ValueExpr;
 import org.eclipse.rdf4j.query.algebra.Var;
 import org.eclipse.rdf4j.query.algebra.evaluation.impl.ExternalSet;

 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.BiMap;
 import com.google.common.collect.HashBiMap;
 import com.mongodb.BasicDBObject;
 import com.mongodb.DBObject;
 import com.mongodb.client.MongoCollection;
 import com.mongodb.client.model.Aggregates;
 import com.mongodb.client.model.BsonField;
 import com.mongodb.client.model.Filters;
 import com.mongodb.client.model.Projections;

 /**
  * Represents a portion of a query tree as MongoDB aggregation pipeline. Should
  * be built bottom-up: start with a statement pattern implemented as a $match
  * step, then add steps to the pipeline to handle higher levels of the query
  * tree. Methods are provided to add certain supported query operations to the
  * end of the internal pipeline. In some cases, specific arguments may be
  * unsupported, in which case the pipeline is unchanged and the method returns
  * false.
  */
 public class AggregationPipelineQueryNode extends ExternalSet {
     /**
      * An aggregation result corresponding to a solution should map this key
      * to an object which itself maps variable names to variable values.
      */
     static final String VALUES = "<VALUES>";

     /**
      * An aggregation result corresponding to a solution should map this key
      * to an object which itself maps variable names to the corresponding hashes
      * of their values.
      */
     static final String HASHES = "<HASHES>";

     /**
      * An aggregation result corresponding to a solution should map this key
      * to an object which itself maps variable names to their datatypes, if any.
      */
     static final String TYPES = "<TYPES>";

     private static final String LEVEL = "derivation_level";
     private static final String[] FIELDS = { VALUES, HASHES, TYPES, LEVEL, TIMESTAMP };

     private static final String JOINED_TRIPLE = "<JOINED_TRIPLE>";
     private static final String FIELDS_MATCH = "<JOIN_FIELDS_MATCH>";

     private static final MongoDBStorageStrategy<RyaStatement> strategy = new SimpleMongoDBStorageStrategy();

     private static final Bson DEFAULT_TYPE = new Document("$literal", XMLSchema.ANYURI.stringValue());
     private static final Bson DEFAULT_CONTEXT = new Document("$literal", "");
     private static final Bson DEFAULT_DV = DocumentVisibilityAdapter.toDBObject(MongoDbRdfConstants.EMPTY_DV);
     private static final Bson DEFAULT_METADATA = new Document("$literal",
             StatementMetadata.EMPTY_METADATA.toString());

     private static boolean isValidFieldName(final String name) {
         return !(name == null || name.contains(".") || name.contains("$")
                 || name.equals("_id"));
     }

     /**
      * For a given statement pattern, represents a mapping from query variables
      * to their corresponding parts of matching triples. If necessary, also
      * substitute variable names including invalid characters with temporary
      * replacements, while producing a map back to the original names.
      */
     private static class StatementVarMapping {
         private final Map<String, String> varToTripleValue = new HashMap<>();
         private final Map<String, String> varToTripleHash = new HashMap<>();
         private final Map<String, String> varToTripleType = new HashMap<>();
         private final BiMap<String, String> varToOriginalName;

         String valueField(final String varName) {
             return varToTripleValue.get(varName);
         }
         String hashField(final String varName) {
             return varToTripleHash.get(varName);
         }
         String typeField(final String varName) {
             return varToTripleType.get(varName);
         }

         Set<String> varNames() {
             return varToTripleValue.keySet();
         }

         private String replace(final String original) {
             if (varToOriginalName.containsValue(original)) {
                 return varToOriginalName.inverse().get(original);
             }
             else {
                 final String replacement = "field-" + UUID.randomUUID();
                 varToOriginalName.put(replacement, original);
                 return replacement;
             }
         }

         private String sanitize(final String name) {
             if (varToOriginalName.containsValue(name)) {
                 return varToOriginalName.inverse().get(name);
             }
             else if (name != null && !isValidFieldName(name)) {
                 return replace(name);
             }
             return name;
         }

         StatementVarMapping(final StatementPattern sp, final BiMap<String, String> varToOriginalName) {
             this.varToOriginalName = varToOriginalName;
             if (sp.getSubjectVar() != null && !sp.getSubjectVar().hasValue()) {
                 final String name = sanitize(sp.getSubjectVar().getName());
                 varToTripleValue.put(name, SUBJECT);
                 varToTripleHash.put(name, SUBJECT_HASH);
             }
             if (sp.getPredicateVar() != null && !sp.getPredicateVar().hasValue()) {
                 final String name = sanitize(sp.getPredicateVar().getName());
                 varToTripleValue.put(name, PREDICATE);
                 varToTripleHash.put(name, PREDICATE_HASH);
             }
             if (sp.getObjectVar() != null && !sp.getObjectVar().hasValue()) {
                 final String name = sanitize(sp.getObjectVar().getName());
                 varToTripleValue.put(name, OBJECT);
                 varToTripleHash.put(name, OBJECT_HASH);
                 varToTripleType.put(name, OBJECT_TYPE);
                 varToTripleType.put(name, OBJECT_LANGUAGE);
             }
             if (sp.getContextVar() != null && !sp.getContextVar().hasValue()) {
                 final String name = sanitize(sp.getContextVar().getName());
                 varToTripleValue.put(name, CONTEXT);
             }
         }

         Bson getProjectExpression() {
             return getProjectExpression(new LinkedList<>(), str -> "$" + str);
         }

         Bson getProjectExpression(final Iterable<String> alsoInclude,
                 final Function<String, String> getFieldExpr) {
             final Document values = new Document();
             final Document hashes = new Document();
             final Document types = new Document();
             for (final String varName : varNames()) {
                 values.append(varName, getFieldExpr.apply(valueField(varName)));
                 if (varToTripleHash.containsKey(varName)) {
                     hashes.append(varName, getFieldExpr.apply(hashField(varName)));
                 }
                 if (varToTripleType.containsKey(varName)) {
                     types.append(varName, getFieldExpr.apply(typeField(varName)));
                 }
             }
             for (final String varName : alsoInclude) {
                 values.append(varName, 1);
                 hashes.append(varName, 1);
                 types.append(varName, 1);
             }
             final List<Bson> fields = new LinkedList<>();
             fields.add(Projections.excludeId());
             fields.add(Projections.computed(VALUES, values));
             fields.add(Projections.computed(HASHES, hashes));
             if (!types.isEmpty()) {
                 fields.add(Projections.computed(TYPES, types));
             }
             fields.add(Projections.computed(LEVEL, new Document("$max",
                     Arrays.asList("$" + LEVEL, getFieldExpr.apply(LEVEL), 0))));
             fields.add(Projections.computed(TIMESTAMP, new Document("$max",
                     Arrays.asList("$" + TIMESTAMP, getFieldExpr.apply(TIMESTAMP), 0))));
             return Projections.fields(fields);
         }
     }

     /**
      * Given a StatementPattern, generate an object representing the arguments
      * to a "$match" command that will find matching triples.
      * @param sp The StatementPattern to search for
      * @param path If given, specify the field that should be matched against
      *  the statement pattern, using an ordered list of field names for a nested
      *  field. E.g. to match records { "x": { "y": <statement pattern } }, pass
      *  "x" followed by "y".
      * @return The argument of a "$match" query
      */
     private static BasicDBObject getMatchExpression(final StatementPattern sp, final String ... path) {
         final Var subjVar = sp.getSubjectVar();
         final Var predVar = sp.getPredicateVar();
         final Var objVar = sp.getObjectVar();
         final Var contextVar = sp.getContextVar();
         RyaIRI s = null;
         RyaIRI p = null;
         RyaType o = null;
         RyaIRI c = null;
         if (subjVar != null && subjVar.getValue() instanceof Resource) {
             s = RdfToRyaConversions.convertResource((Resource) subjVar.getValue());
         }
         if (predVar != null && predVar.getValue() instanceof IRI) {
             p = RdfToRyaConversions.convertIRI((IRI) predVar.getValue());
         }
         if (objVar != null && objVar.getValue() != null) {
             o = RdfToRyaConversions.convertValue(objVar.getValue());
         }
         if (contextVar != null && contextVar.getValue() instanceof IRI) {
             c = RdfToRyaConversions.convertIRI((IRI) contextVar.getValue());
         }
         final RyaStatement rs = new RyaStatement(s, p, o, c);
         final DBObject obj = strategy.getQuery(rs);
         // Add path prefix, if given
         if (path.length > 0) {
             final StringBuilder sb = new StringBuilder();
             for (final String str : path) {
                 sb.append(str).append(".");
             }
             final String prefix = sb.toString();
             final Set<String> originalKeys = new HashSet<>(obj.keySet());
             originalKeys.forEach(key -> {
                 final Object value = obj.removeField(key);
                 obj.put(prefix + key, value);
             });
         }
         return (BasicDBObject) obj;
     }

     private static String valueFieldExpr(final String varName) {
         return "$" + VALUES + "." + varName;
     }
     private static String hashFieldExpr(final String varName) {
         return "$" + HASHES + "." + varName;
     }
     private static String typeFieldExpr(final String varName) {
         return "$" + TYPES + "." + varName;
     }
     private static String joinFieldExpr(final String triplePart) {
         return "$" + JOINED_TRIPLE + "." + triplePart;
     }

     /**
      * Get an object representing the value field of some value expression, or
      * return null if the expression isn't supported.
      */
     private Object valueFieldExpr(final ValueExpr expr) {
         if (expr instanceof Var) {
             return valueFieldExpr(((Var) expr).getName());
         }
         else if (expr instanceof ValueConstant) {
             return new Document("$literal", ((ValueConstant) expr).getValue().stringValue());
         }
         else {
             return null;
         }
     }

     private final List<Bson> pipeline;
     private final MongoCollection<Document> collection;
     private final Set<String> assuredBindingNames;
     private final Set<String> bindingNames;
     private final BiMap<String, String> varToOriginalName;

     private String replace(final String original) {
         if (varToOriginalName.containsValue(original)) {
             return varToOriginalName.inverse().get(original);
         }
         else {
             final String replacement = "field-" + UUID.randomUUID();
             varToOriginalName.put(replacement, original);
             return replacement;
         }
     }

     /**
      * Create a pipeline query node based on a StatementPattern.
      * @param collection The collection of triples to query.
      * @param baseSP The leaf node in the query tree.
      */
     public AggregationPipelineQueryNode(final MongoCollection<Document> collection, final StatementPattern baseSP) {
         this.collection = Preconditions.checkNotNull(collection);
         Preconditions.checkNotNull(baseSP);
         this.varToOriginalName = HashBiMap.create();
         final StatementVarMapping mapping = new StatementVarMapping(baseSP, varToOriginalName);
         this.assuredBindingNames = new HashSet<>(mapping.varNames());
         this.bindingNames = new HashSet<>(mapping.varNames());
         this.pipeline = new LinkedList<>();
         this.pipeline.add(Aggregates.match(getMatchExpression(baseSP)));
         this.pipeline.add(Aggregates.project(mapping.getProjectExpression()));
     }

     AggregationPipelineQueryNode(final MongoCollection<Document> collection,
             final List<Bson> pipeline, final Set<String> assuredBindingNames,
             final Set<String> bindingNames, final BiMap<String, String> varToOriginalName) {
         this.collection = Preconditions.checkNotNull(collection);
         this.pipeline = Preconditions.checkNotNull(pipeline);
         this.assuredBindingNames = Preconditions.checkNotNull(assuredBindingNames);
         this.bindingNames = Preconditions.checkNotNull(bindingNames);
         this.varToOriginalName = Preconditions.checkNotNull(varToOriginalName);
     }

     @Override
     public boolean equals(final Object o) {
         if (this == o) {
             return true;
         }
         if (o instanceof AggregationPipelineQueryNode) {
             final AggregationPipelineQueryNode other = (AggregationPipelineQueryNode) o;
             if (this.collection.equals(other.collection)
                     && this.assuredBindingNames.equals(other.assuredBindingNames)
                     && this.bindingNames.equals(other.bindingNames)
                     && this.varToOriginalName.equals(other.varToOriginalName)
                     && this.pipeline.size() == other.pipeline.size()) {
                 // Check pipeline steps for equality -- underlying types don't
                 // have well-behaved equals methods, so check for equivalent
                 // string representations.
                 for (int i = 0; i < this.pipeline.size(); i++) {
                     final Bson doc1 = this.pipeline.get(i);
                     final Bson doc2 = other.pipeline.get(i);
                     if (!doc1.toString().equals(doc2.toString())) {
                         return false;
                     }
                 }
                 return true;
             }
         }
         return false;
     }

     @Override
     public int hashCode() {
         return Objects.hashCode(collection, pipeline, assuredBindingNames,
                 bindingNames, varToOriginalName);
     }

     @Override
     public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final BindingSet bindings)
             throws QueryEvaluationException {
         return new PipelineResultIteration(collection.aggregate(pipeline), varToOriginalName, bindings);
     }

     @Override
     public Set<String> getAssuredBindingNames() {
         final Set<String> names = new HashSet<>();
         for (final String name : assuredBindingNames) {
             names.add(varToOriginalName.getOrDefault(name, name));
         }
         return names;
     }

     @Override
     public Set<String> getBindingNames() {
         final Set<String> names = new HashSet<>();
         for (final String name : bindingNames) {
             names.add(varToOriginalName.getOrDefault(name, name));
         }
         return names;
     }

     @Override
     public AggregationPipelineQueryNode clone() {
         return new AggregationPipelineQueryNode(collection,
                 new LinkedList<>(pipeline),
                 new HashSet<>(assuredBindingNames),
                 new HashSet<>(bindingNames),
                 HashBiMap.create(varToOriginalName));
     }

     @Override
     public String getSignature() {
         super.getSignature();
         final Set<String> assured = getAssuredBindingNames();
         final Set<String> any = getBindingNames();
         final StringBuilder sb = new StringBuilder("AggregationPipelineQueryNode (binds: ");
         sb.append(String.join(", ", assured));
         if (any.size() > assured.size()) {
             final Set<String> optionalBindingNames = any;
             optionalBindingNames.removeAll(assured);
             sb.append(" [")
                 .append(String.join(", ", optionalBindingNames))
                 .append("]");
         }
         sb.append(")\n");
         for (final Bson doc : pipeline) {
             sb.append(doc.toString()).append("\n");
         }
         return sb.toString();
     }

     /**
      * Get the internal list of aggregation pipeline steps. Note that documents
      * resulting from this pipeline will be structured using an internal
      * intermediate representation. For documents representing triples, see
      * {@link #getTriplePipeline}, and for query solutions, see
      * {@link #evaluate}.
      * @return The current internal pipeline.
      */
     List<Bson> getPipeline() {
         return pipeline;
     }

     /**
      * Add a join with an individual {@link StatementPattern} to the pipeline.
      * @param sp The statement pattern to join with
      * @return true if the join was successfully added to the pipeline.
      */
     public boolean joinWith(final StatementPattern sp) {
         Preconditions.checkNotNull(sp);
         // 1. Determine shared variables and new variables
         final StatementVarMapping spMap = new StatementVarMapping(sp, varToOriginalName);
         final NavigableSet<String> sharedVars = new ConcurrentSkipListSet<>(spMap.varNames());
         sharedVars.retainAll(assuredBindingNames);
         // 2. Join on one shared variable
         final String joinKey =  sharedVars.pollFirst();
         final String collectionName = collection.getNamespace().getCollectionName();
         Bson join;
         if (joinKey == null) {
             return false;
         }
         else {
             join = Aggregates.lookup(collectionName,
                     HASHES + "." + joinKey,
                     spMap.hashField(joinKey),
                     JOINED_TRIPLE);
         }
         pipeline.add(join);
         // 3. Unwind the joined triples so each document represents a binding
         //   set (solution) from the base branch and a triple that may match.
         pipeline.add(Aggregates.unwind("$" + JOINED_TRIPLE));
         // 4. (Optional) If there are any shared variables that weren't used as
         //   the join key, project all existing fields plus a new field that
         //   tests the equality of those shared variables.
         final BasicDBObject matchOpts = getMatchExpression(sp, JOINED_TRIPLE);
         if (!sharedVars.isEmpty()) {
             final List<Bson> eqTests = new LinkedList<>();
             for (final String varName : sharedVars) {
                 final String oldField = valueFieldExpr(varName);
                 final String newField = joinFieldExpr(spMap.valueField(varName));
                 final Bson eqTest = new Document("$eq", Arrays.asList(oldField, newField));
                 eqTests.add(eqTest);
             }
             final Bson eqProjectOpts = Projections.fields(
                     Projections.computed(FIELDS_MATCH, Filters.and(eqTests)),
                     Projections.include(JOINED_TRIPLE, VALUES, HASHES, TYPES, LEVEL, TIMESTAMP));
             pipeline.add(Aggregates.project(eqProjectOpts));
             matchOpts.put(FIELDS_MATCH, true);
         }
         // 5. Filter for solutions whose triples match the joined statement
         //  pattern, and, if applicable, whose additional shared variables
         //  match the current solution.
         pipeline.add(Aggregates.match(matchOpts));
         // 6. Project the results to include variables from the new SP (with
         // appropriate renaming) and variables referenced only in the base
         // pipeline (with previous names).
         final Bson finalProjectOpts = new StatementVarMapping(sp, varToOriginalName)
                 .getProjectExpression(assuredBindingNames,
                         str -> joinFieldExpr(str));
         assuredBindingNames.addAll(spMap.varNames());
         bindingNames.addAll(spMap.varNames());
         pipeline.add(Aggregates.project(finalProjectOpts));
         return true;
     }

     /**
      * Add a SPARQL projection or multi-projection operation to the pipeline.
      * The number of documents produced by the pipeline after this operation
      * will be the number of documents entering this stage (the number of
      * intermediate results) multiplied by the number of
      * {@link ProjectionElemList}s supplied here. Empty projections are
      * unsupported; if one or more projections given binds zero variables, then
      * the pipeline will be unchanged and the method will return false.
      * @param projections One or more projections, i.e. mappings from the result
      *  at this stage of the query into a set of variables.
      * @return true if the projection(s) were added to the pipeline.
      */
     public boolean project(final Iterable<ProjectionElemList> projections) {
         if (projections == null || !projections.iterator().hasNext()) {
             return false;
         }
         final List<Bson> projectOpts = new LinkedList<>();
         final Set<String> bindingNamesUnion = new HashSet<>();
         Set<String> bindingNamesIntersection = null;
         for (final ProjectionElemList projection : projections) {
             if (projection.getElements().isEmpty()) {
                 // Empty projections are unsupported -- fail when seen
                 return false;
             }
             final Document valueDoc = new Document();
             final Document hashDoc = new Document();
             final Document typeDoc = new Document();
             final Set<String> projectionBindingNames = new HashSet<>();
             for (final ProjectionElem elem : projection.getElements()) {
                 String to = elem.getTargetName();
                 // If the 'to' name is invalid, replace it internally
                 if (!isValidFieldName(to)) {
                     to = replace(to);
                 }
                 String from = elem.getSourceName();
                 // If the 'from' name is invalid, use the internal substitute
                 if (varToOriginalName.containsValue(from)) {
                     from = varToOriginalName.inverse().get(from);
                 }
                 projectionBindingNames.add(to);
                 if (to.equals(from)) {
                     valueDoc.append(to, 1);
                     hashDoc.append(to, 1);
                     typeDoc.append(to, 1);
                 }
                 else {
                     valueDoc.append(to, valueFieldExpr(from));
                     hashDoc.append(to, hashFieldExpr(from));
                     typeDoc.append(to, typeFieldExpr(from));
                 }
             }
             bindingNamesUnion.addAll(projectionBindingNames);
             if (bindingNamesIntersection == null) {
                 bindingNamesIntersection = new HashSet<>(projectionBindingNames);
             }
             else {
                 bindingNamesIntersection.retainAll(projectionBindingNames);
             }
             projectOpts.add(new Document()
                     .append(VALUES, valueDoc)
                     .append(HASHES, hashDoc)
                     .append(TYPES, typeDoc)
                     .append(LEVEL, "$" + LEVEL)
                     .append(TIMESTAMP, "$" + TIMESTAMP));
         }
         if (projectOpts.size() == 1) {
             pipeline.add(Aggregates.project(projectOpts.get(0)));
         }
         else {
             final String listKey = "PROJECTIONS";
             final Bson projectIndividual = Projections.fields(
                     Projections.computed(VALUES, "$" + listKey + "." + VALUES),
                     Projections.computed(HASHES, "$" + listKey + "." + HASHES),
                     Projections.computed(TYPES, "$" + listKey + "." + TYPES),
                     Projections.include(LEVEL),
                     Projections.include(TIMESTAMP));
             pipeline.add(Aggregates.project(Projections.computed(listKey, projectOpts)));
             pipeline.add(Aggregates.unwind("$" + listKey));
             pipeline.add(Aggregates.project(projectIndividual));
         }
         assuredBindingNames.clear();
         bindingNames.clear();
         assuredBindingNames.addAll(bindingNamesIntersection);
         bindingNames.addAll(bindingNamesUnion);
         return true;
     }

     /**
      * Add a SPARQL extension to the pipeline, if possible. An extension adds
      * some number of variables to the result. Adds a "$project" step to the
      * pipeline, but differs from the SPARQL project operation in that
      * 1) pre-existing variables are always kept, and 2) values of new variables
      * are defined by expressions, which may be more complex than simply
      * variable names. Not all expressions are supported. If unsupported
      * expression types are used in the extension, the pipeline will remain
      * unchanged and this method will return false.
      * @param extensionElements A list of new variables and their expressions
      * @return True if the extension was successfully converted into a pipeline
      *  step, false otherwise.
      */
     public boolean extend(final Iterable<ExtensionElem> extensionElements) {
         final List<Bson> valueFields = new LinkedList<>();
         final List<Bson> hashFields = new LinkedList<>();
         final List<Bson> typeFields = new LinkedList<>();
         for (final String varName : bindingNames) {
             valueFields.add(Projections.include(varName));
             hashFields.add(Projections.include(varName));
             typeFields.add(Projections.include(varName));
         }
         final Set<String> newVarNames = new HashSet<>();
         for (final ExtensionElem elem : extensionElements) {
             String name = elem.getName();
             if (!isValidFieldName(name)) {
                 // If the field name is invalid, replace it internally
                 name = replace(name);
             }
             // We can only handle certain kinds of value expressions; return
             // failure for any others.
             final ValueExpr expr = elem.getExpr();
             final Object valueField;
             final Object hashField;
             final Object typeField;
             if (expr instanceof Var) {
                 final String varName = ((Var) expr).getName();
                 valueField = "$" + varName;
                 hashField = "$" + varName;
                 typeField = "$" + varName;
             }
             else if (expr instanceof ValueConstant) {
                 final Value val = ((ValueConstant) expr).getValue();
                 valueField = new Document("$literal", val.stringValue());
                 hashField = new Document("$literal", SimpleMongoDBStorageStrategy.hash(val.stringValue()));
                 if (val instanceof Literal) {
                     typeField = new Document("$literal", ((Literal) val).getDatatype().stringValue());
                 }
                 else {
                     typeField = null;
                 }
             }
             else {
                 // if not understood, return failure
                 return false;
             }
             valueFields.add(Projections.computed(name, valueField));
             hashFields.add(Projections.computed(name, hashField));
             if (typeField != null) {
                 typeFields.add(Projections.computed(name, typeField));
             }
             newVarNames.add(name);
         }
         assuredBindingNames.addAll(newVarNames);
         bindingNames.addAll(newVarNames);
         final Bson projectOpts = Projections.fields(
                 Projections.computed(VALUES, Projections.fields(valueFields)),
                 Projections.computed(HASHES, Projections.fields(hashFields)),
                 Projections.computed(TYPES, Projections.fields(typeFields)),
                 Projections.include(LEVEL),
                 Projections.include(TIMESTAMP));
         pipeline.add(Aggregates.project(projectOpts));
         return true;
     }

     /**
      * Add a SPARQL filter to the pipeline, if possible. A filter eliminates
      * results that don't satisfy a given condition. Not all conditional
      * expressions are supported. If unsupported expressions are used in the
      * filter, the pipeline will remain unchanged and this method will return
      * false. Currently only supports binary {@link Compare} conditions among
      * variables and/or literals.
      * @param condition The filter condition
      * @return True if the filter was successfully converted into a pipeline
      *  step, false otherwise.
      */
     public boolean filter(final ValueExpr condition) {
         if (condition instanceof Compare) {
             final Compare compare = (Compare) condition;
             final Compare.CompareOp operator = compare.getOperator();
             final Object leftArg = valueFieldExpr(compare.getLeftArg());
             final Object rightArg = valueFieldExpr(compare.getRightArg());
             if (leftArg == null || rightArg == null) {
                 // unsupported value expression, can't convert filter
                 return false;
             }
             final String opFunc;
             switch (operator) {
             case EQ:
                 opFunc = "$eq";
                 break;
             case NE:
                 opFunc = "$ne";
                 break;
             case LT:
                 opFunc = "$lt";
                 break;
             case LE:
                 opFunc = "$le";
                 break;
             case GT:
                 opFunc = "$gt";
                 break;
             case GE:
                 opFunc = "$ge";
                 break;
             default:
                 // unrecognized comparison operator, can't convert filter
                 return false;
             }
             final Document compareDoc = new Document(opFunc, Arrays.asList(leftArg, rightArg));
             pipeline.add(Aggregates.project(Projections.fields(
                     Projections.computed("FILTER", compareDoc),
                     Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP))));
             pipeline.add(Aggregates.match(new Document("FILTER", true)));
             pipeline.add(Aggregates.project(Projections.fields(
                     Projections.include(VALUES, HASHES, TYPES, LEVEL, TIMESTAMP))));
             return true;
         }
         return false;
     }

     /**
      * Add a $group step to filter out redundant solutions.
      * @return True if the distinct operation was successfully appended.
      */
     public boolean distinct() {
         final List<String> key = new LinkedList<>();
         for (final String varName : bindingNames) {
             key.add(hashFieldExpr(varName));
         }
         final List<BsonField> reduceOps = new LinkedList<>();
         for (final String field : FIELDS) {
             reduceOps.add(new BsonField(field, new Document("$first", "$" + field)));
         }
         pipeline.add(Aggregates.group(new Document("$concat", key), reduceOps));
         return true;
     }

     /**
      * Add a step to the end of the current pipeline which prunes the results
      * according to the recorded derivation level of their sources. At least one
      * triple that was used to construct the result must have a derivation level
      * at least as high as the parameter, indicating that it was derived via
      * that many steps from the original data. (A value of zero is equivalent to
      * input data that was not derived at all.) Use in conjunction with
      * getTriplePipeline (which sets source level for generated triples) to
      * avoid repeatedly deriving the same results.
      * @param requiredLevel Required derivation depth. Reject a solution to the
      *  query if all of the triples involved in producing that solution have a
      *  lower derivation depth than this. If zero, does nothing.
      */
     public void requireSourceDerivationDepth(final int requiredLevel) {
         if (requiredLevel > 0) {
             pipeline.add(Aggregates.match(new Document(LEVEL,
                     new Document("$gte", requiredLevel))));
         }
     }

     /**
      * Add a step to the end of the current pipeline which prunes the results
      * according to the timestamps of their sources. At least one triple that
      * was used to construct the result must have a timestamp at least as
      * recent as the parameter. Use in iterative applications to avoid deriving
      * solutions that would have been generated in an earlier iteration.
      * @param t Minimum required timestamp. Reject a solution to the query if
      *  all of the triples involved in producing that solution have an earlier
      *  timestamp than this.
      */
     public void requireSourceTimestamp(final long t) {
         pipeline.add(Aggregates.match(new Document(TIMESTAMP,
                 new Document("$gte", t))));
     }

     /**
      * Given that the current state of the pipeline produces data that can be
      * interpreted as triples, add a project step to map each result from the
      * intermediate result structure to a structure that can be stored in the
      * triple store. Does not modify the internal pipeline, which will still
      * produce intermediate results suitable for query evaluation.
      * @param timestamp Attach this timestamp to the resulting triples.
      * @param requireNew If true, add an additional step to check constructed
      *  triples against existing triples and only include new ones in the
      *  result. Adds a potentially expensive $lookup step.
      * @throws IllegalStateException if the results produced by the current
      *  pipeline do not have variable names allowing them to be interpreted as
      *  triples (i.e. "subject", "predicate", and "object").
      */
     public List<Bson> getTriplePipeline(final long timestamp, final boolean requireNew) {
         if (!assuredBindingNames.contains(SUBJECT)
                 || !assuredBindingNames.contains(PREDICATE)
                 || !assuredBindingNames.contains(OBJECT)) {
             throw new IllegalStateException("Current pipeline does not produce "
                     + "records that can be converted into triples.\n"
                     + "Required variable names: <" + SUBJECT + ", " + PREDICATE
                     + ", " + OBJECT + ">\nCurrent variable names: "
                     + assuredBindingNames);
         }
         final List<Bson> triplePipeline = new LinkedList<>(pipeline);
         final List<Bson> fields = new LinkedList<>();
         fields.add(Projections.computed(SUBJECT, valueFieldExpr(SUBJECT)));
         fields.add(Projections.computed(SUBJECT_HASH, hashFieldExpr(SUBJECT)));
         fields.add(Projections.computed(PREDICATE, valueFieldExpr(PREDICATE)));
         fields.add(Projections.computed(PREDICATE_HASH, hashFieldExpr(PREDICATE)));
         fields.add(Projections.computed(OBJECT, valueFieldExpr(OBJECT)));
         fields.add(Projections.computed(OBJECT_HASH, hashFieldExpr(OBJECT)));
         fields.add(Projections.computed(OBJECT_TYPE,
                 ConditionalOperators.ifNull(typeFieldExpr(OBJECT), DEFAULT_TYPE)));
         fields.add(Projections.computed(OBJECT_LANGUAGE, hashFieldExpr(OBJECT)));
         fields.add(Projections.computed(CONTEXT, DEFAULT_CONTEXT));
         fields.add(Projections.computed(STATEMENT_METADATA, DEFAULT_METADATA));
         fields.add(DEFAULT_DV);
         fields.add(Projections.computed(TIMESTAMP, new Document("$literal", timestamp)));
         fields.add(Projections.computed(LEVEL, new Document("$add", Arrays.asList("$" + LEVEL, 1))));
         triplePipeline.add(Aggregates.project(Projections.fields(fields)));
         if (requireNew) {
             // Prune any triples that already exist in the data store
             final String collectionName = collection.getNamespace().getCollectionName();
             final Bson includeAll = Projections.include(SUBJECT, SUBJECT_HASH,
                     PREDICATE, PREDICATE_HASH, OBJECT, OBJECT_HASH,
                     OBJECT_TYPE, OBJECT_LANGUAGE, CONTEXT, STATEMENT_METADATA,
                     DOCUMENT_VISIBILITY, TIMESTAMP, LEVEL);
             final List<Bson> eqTests = new LinkedList<>();
             eqTests.add(new Document("$eq", Arrays.asList("$$this." + PREDICATE_HASH, "$" + PREDICATE_HASH)));
             eqTests.add(new Document("$eq", Arrays.asList("$$this." + OBJECT_HASH, "$" + OBJECT_HASH)));
             final Bson redundantFilter = new Document("$filter", new Document("input", "$" + JOINED_TRIPLE)
                     .append("as", "this").append("cond", new Document("$and", eqTests)));
             triplePipeline.add(Aggregates.lookup(collectionName, SUBJECT_HASH,
                     SUBJECT_HASH, JOINED_TRIPLE));
             final String numRedundant = "REDUNDANT";
             triplePipeline.add(Aggregates.project(Projections.fields(includeAll,
                     Projections.computed(numRedundant, new Document("$size", redundantFilter)))));
             triplePipeline.add(Aggregates.match(Filters.eq(numRedundant, 0)));
             triplePipeline.add(Aggregates.project(Projections.fields(includeAll)));
         }
         return triplePipeline;
     }
 }