extras/rya.forwardchain/src/main/java/org/apache/rya/forwardchain/strategy/MongoPipelineStrategy.java - rya - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.rya.forwardchain.strategy;

 import java.util.List;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.LongAdder;

 import org.apache.log4j.Logger;
 import org.apache.rya.api.domain.RyaStatement;
 import org.apache.rya.api.domain.StatementMetadata;
 import org.apache.rya.api.persist.RyaDAOException;
 import org.apache.rya.api.persist.query.RyaQuery;
 import org.apache.rya.api.persist.query.RyaQueryEngine;
 import org.apache.rya.forwardchain.ForwardChainException;
 import org.apache.rya.forwardchain.rule.AbstractConstructRule;
 import org.apache.rya.forwardchain.rule.Rule;
 import org.apache.rya.mongodb.MongoDBRdfConfiguration;
 import org.apache.rya.mongodb.MongoDBRyaDAO;
 import org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration;
 import org.apache.rya.mongodb.aggregation.AggregationPipelineQueryNode;
 import org.apache.rya.mongodb.aggregation.SparqlToPipelineTransformVisitor;
 import org.apache.rya.mongodb.batch.MongoDbBatchWriter;
 import org.apache.rya.mongodb.batch.MongoDbBatchWriterConfig;
 import org.apache.rya.mongodb.batch.MongoDbBatchWriterException;
 import org.apache.rya.mongodb.batch.MongoDbBatchWriterUtils;
 import org.apache.rya.mongodb.batch.collection.CollectionType;
 import org.apache.rya.mongodb.batch.collection.MongoCollectionType;
 import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy;
 import org.apache.rya.sail.config.RyaSailFactory;
 import org.bson.Document;
 import org.bson.conversions.Bson;
 import org.eclipse.rdf4j.query.algebra.QueryRoot;
 import org.eclipse.rdf4j.query.algebra.TupleExpr;

 import com.google.common.base.Preconditions;
 import com.mongodb.Block;
 import com.mongodb.DBObject;
 import com.mongodb.MongoClient;
 import com.mongodb.client.MongoCollection;
 import com.mongodb.client.MongoDatabase;
 import com.mongodb.util.JSON;

 /**
  * A rule execution strategy for MongoDB Rya that converts a single rule into an
  * aggregation pipeline whenever possible. Falls back on an internal
  * {@link SailExecutionStrategy} to handle any rules that can't be converted.
  */
 public class MongoPipelineStrategy extends AbstractRuleExecutionStrategy {
     private static final Logger logger = Logger.getLogger(MongoPipelineStrategy.class);

     private static final int PIPELINE_BATCH_SIZE = 1000;

     private final SparqlToPipelineTransformVisitor pipelineVisitor;
     private final MongoCollection<Document> baseCollection;
     private final MongoDbBatchWriter<Document> batchWriter;
     private final MongoDBRyaDAO dao;
     private final SimpleMongoDBStorageStrategy storageStrategy = new SimpleMongoDBStorageStrategy();
     private final ConcurrentHashMap<Rule, Long> executionTimes = new ConcurrentHashMap<>();
     private final AbstractRuleExecutionStrategy backup;
     private final RyaQueryEngine<StatefulMongoDBRdfConfiguration> engine;
     private boolean usedBackup = false;

     /**
      * Initialize based on a configuration.
      * @param mongoConf Should contain database information; cannot be null. If
      *      passed a stateful configuration, uses the existing mongo client,
      *      otherwise creates one.
      */
     public MongoPipelineStrategy(MongoDBRdfConfiguration mongoConf) throws ForwardChainException {
         Preconditions.checkNotNull(mongoConf);
         final String mongoDBName = mongoConf.getMongoDBName();
         final String collectionName = mongoConf.getTriplesCollectionName();
         mongoConf.setFlush(false);
         final StatefulMongoDBRdfConfiguration statefulConf;
         try {
             if (mongoConf instanceof StatefulMongoDBRdfConfiguration) {
                 statefulConf = (StatefulMongoDBRdfConfiguration) mongoConf;
                 this.dao = new MongoDBRyaDAO();
                 this.dao.setConf(statefulConf);
                 this.dao.init();
             }
             else {
                 this.dao = RyaSailFactory.getMongoDAO(mongoConf);
                 statefulConf = this.dao.getConf();
             }
         } catch (RyaDAOException e) {
             throw new ForwardChainException("Can't connect to Rya.", e);
         }
         final MongoClient mongoClient = statefulConf.getMongoClient();
         final MongoDatabase mongoDB = mongoClient.getDatabase(mongoDBName);
         this.baseCollection = mongoDB.getCollection(collectionName);
         this.pipelineVisitor = new SparqlToPipelineTransformVisitor(this.baseCollection);
         this.engine = this.dao.getQueryEngine();
         this.backup = new SailExecutionStrategy(statefulConf);
         final MongoDbBatchWriterConfig writerConfig = MongoDbBatchWriterUtils.getMongoDbBatchWriterConfig(statefulConf);
         final CollectionType<Document> ct = new MongoCollectionType(baseCollection);
         this.batchWriter = new MongoDbBatchWriter<>(ct, writerConfig);
         try {
             this.batchWriter.start();
         } catch (final MongoDbBatchWriterException e) {
             throw new ForwardChainException("Error starting MongoDB batch writer", e);
         }
     }

     /**
      * Execute a CONSTRUCT rule by converting it into a pipeline, iterating
      * through the resulting documents, and inserting them back to the data
      * store as new triples. If pipeline conversion fails, falls back on
      * default execution strategy.
      * @param rule A construct query rule; not null.
      * @param metadata StatementMetadata to attach to new triples; not null.
      * @return The number of new triples inferred.
      * @throws ForwardChainException if execution fails.
      */
     @Override
     public long executeConstructRule(AbstractConstructRule rule,
             StatementMetadata metadata) throws ForwardChainException {
         Preconditions.checkNotNull(rule);
         logger.info("Applying inference rule " + rule + "...");
         long timestamp = System.currentTimeMillis();
         // Get a pipeline that turns individual matches into triples
         List<Bson> pipeline = null;
         try {
             int requireSourceLevel = 0;
             if (!usedBackup) {
                 // If we can assume derivation levels are set properly, we can optimize by
                 // pruning any derived fact whose sources are all old information. (i.e. we can
                 // infer that the pruned fact would have already been derived in a previous
                 // step.) But if the backup strategy has ever been used, the source triples aren't
                 // guaranteed to have derivation level set.
                 requireSourceLevel = requiredLevel;
             }
             pipeline = toPipeline(rule, requireSourceLevel, timestamp);
         }
         catch (ForwardChainException e) {
             logger.error(e);
         }
         if (pipeline == null) {
             if (backup == null) {
                 logger.error("Couldn't convert " + rule + " to pipeline:");
                 for (String line : rule.getQuery().toString().split("\n")) {
                     logger.error("\t" + line);
                 }
                 throw new UnsupportedOperationException("Couldn't convert query to pipeline.");
             }
             else {
                 logger.debug("Couldn't convert " + rule + " to pipeline:");
                 for (String line : rule.getQuery().toString().split("\n")) {
                     logger.debug("\t" + line);
                 }
                 logger.debug("Using fallback strategy.");
                 usedBackup = true;
                 return backup.executeConstructRule(rule, metadata);
             }
         }
         // Execute the pipeline
         for (Bson step : pipeline) {
             logger.debug("\t" + step.toString());
         }
         LongAdder count = new LongAdder();
         baseCollection.aggregate(pipeline)
             .allowDiskUse(true)
             .batchSize(PIPELINE_BATCH_SIZE)
             .forEach(new Block<Document>() {
                 @Override
                 public void apply(Document doc) {
                     final DBObject dbo = (DBObject) JSON.parse(doc.toJson());
                     RyaStatement rstmt = storageStrategy.deserializeDBObject(dbo);
                     if (!statementExists(rstmt)) {
                         count.increment();
                         doc.replace(SimpleMongoDBStorageStrategy.STATEMENT_METADATA, metadata.toString());
                         try {
                             batchWriter.addObjectToQueue(doc);
                         } catch (MongoDbBatchWriterException e) {
                             logger.error("Couldn't insert " + rstmt, e);
                         }
                     }
                 }
             });
         try {
             batchWriter.flush();
         } catch (MongoDbBatchWriterException e) {
             throw new ForwardChainException("Error writing to Mongo", e);
         }
         logger.info("Added " + count + " new statements.");
         executionTimes.compute(rule, (r, previous) -> {
             if (previous != null && previous > timestamp) {
                 return previous;
             }
             else {
                 return timestamp;
             }
         });
         return count.longValue();
     }

     private boolean statementExists(RyaStatement rstmt) {
         try {
             return engine.query(new RyaQuery(rstmt)).iterator().hasNext();
         } catch (RyaDAOException e) {
             logger.error("Error querying for " + rstmt, e);
             return false;
         }
     }

     /**
      * Flush and close the batch writer, and shut down the backup
      * SailExecutionStrategy.
      * @throws ForwardChainException if the batch writer or backup strategy
      *  throw any errors.
      */
     @Override
     public void shutDown() throws ForwardChainException {
         backup.shutDown();
         try {
             batchWriter.shutdown();
         } catch (MongoDbBatchWriterException e) {
             throw new ForwardChainException("Error shutting down batch writer", e);
         }
     }

     /**
      * Converts a construct rule into a series of documents representing
      * aggregation pipeline steps.
      * @param rule A construct query rule.
      * @param sourceLevel Only make derivations whose source triples have this
      *  derivation level or higher, i.e. took some number of forward chaining
      *  steps to infer. Set to zero to skip this check.
      * @param timestamp Timestamp to be set for all inferred triples.
      * @return An aggregation pipeline.
      * @throws ForwardChainException if pipeline construction fails.
      */
     private List<Bson> toPipeline(AbstractConstructRule rule, int sourceLevel,
             long timestamp) throws ForwardChainException {
         TupleExpr tupleExpr = rule.getQuery().getTupleExpr();
         if (!(tupleExpr instanceof QueryRoot)) {
             tupleExpr = new QueryRoot(tupleExpr);
         }
         try {
             tupleExpr.visit(pipelineVisitor);
         } catch (Exception e) {
             throw new ForwardChainException("Error converting construct rule to an aggregation pipeline", e);
         }
         if (tupleExpr instanceof QueryRoot) {
             QueryRoot root = (QueryRoot) tupleExpr;
             if (root.getArg() instanceof AggregationPipelineQueryNode) {
                 AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) root.getArg();
                 pipelineNode.distinct(); // require distinct triples
                 pipelineNode.requireSourceDerivationDepth(sourceLevel);
                 long latestTime = executionTimes.getOrDefault(rule, 0L);
                 if (latestTime > 0) {
                     pipelineNode.requireSourceTimestamp(latestTime);
                 }
                 return pipelineNode.getTriplePipeline(timestamp, false);
             }
         }
         return null;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.rya.forwardchain.strategy;

	import java.util.List;
	import java.util.concurrent.ConcurrentHashMap;
	import java.util.concurrent.atomic.LongAdder;

	import org.apache.log4j.Logger;
	import org.apache.rya.api.domain.RyaStatement;
	import org.apache.rya.api.domain.StatementMetadata;
	import org.apache.rya.api.persist.RyaDAOException;
	import org.apache.rya.api.persist.query.RyaQuery;
	import org.apache.rya.api.persist.query.RyaQueryEngine;
	import org.apache.rya.forwardchain.ForwardChainException;
	import org.apache.rya.forwardchain.rule.AbstractConstructRule;
	import org.apache.rya.forwardchain.rule.Rule;
	import org.apache.rya.mongodb.MongoDBRdfConfiguration;
	import org.apache.rya.mongodb.MongoDBRyaDAO;
	import org.apache.rya.mongodb.StatefulMongoDBRdfConfiguration;
	import org.apache.rya.mongodb.aggregation.AggregationPipelineQueryNode;
	import org.apache.rya.mongodb.aggregation.SparqlToPipelineTransformVisitor;
	import org.apache.rya.mongodb.batch.MongoDbBatchWriter;
	import org.apache.rya.mongodb.batch.MongoDbBatchWriterConfig;
	import org.apache.rya.mongodb.batch.MongoDbBatchWriterException;
	import org.apache.rya.mongodb.batch.MongoDbBatchWriterUtils;
	import org.apache.rya.mongodb.batch.collection.CollectionType;
	import org.apache.rya.mongodb.batch.collection.MongoCollectionType;
	import org.apache.rya.mongodb.dao.SimpleMongoDBStorageStrategy;
	import org.apache.rya.sail.config.RyaSailFactory;
	import org.bson.Document;
	import org.bson.conversions.Bson;
	import org.eclipse.rdf4j.query.algebra.QueryRoot;
	import org.eclipse.rdf4j.query.algebra.TupleExpr;

	import com.google.common.base.Preconditions;
	import com.mongodb.Block;
	import com.mongodb.DBObject;
	import com.mongodb.MongoClient;
	import com.mongodb.client.MongoCollection;
	import com.mongodb.client.MongoDatabase;
	import com.mongodb.util.JSON;

	/**
	* A rule execution strategy for MongoDB Rya that converts a single rule into an
	* aggregation pipeline whenever possible. Falls back on an internal
	* {@link SailExecutionStrategy} to handle any rules that can't be converted.
	*/
	public class MongoPipelineStrategy extends AbstractRuleExecutionStrategy {
	private static final Logger logger = Logger.getLogger(MongoPipelineStrategy.class);

	private static final int PIPELINE_BATCH_SIZE = 1000;

	private final SparqlToPipelineTransformVisitor pipelineVisitor;
	private final MongoCollection<Document> baseCollection;
	private final MongoDbBatchWriter<Document> batchWriter;
	private final MongoDBRyaDAO dao;
	private final SimpleMongoDBStorageStrategy storageStrategy = new SimpleMongoDBStorageStrategy();
	private final ConcurrentHashMap<Rule, Long> executionTimes = new ConcurrentHashMap<>();
	private final AbstractRuleExecutionStrategy backup;
	private final RyaQueryEngine<StatefulMongoDBRdfConfiguration> engine;
	private boolean usedBackup = false;

	/**
	* Initialize based on a configuration.
	* @param mongoConf Should contain database information; cannot be null. If
	* passed a stateful configuration, uses the existing mongo client,
	* otherwise creates one.
	*/
	public MongoPipelineStrategy(MongoDBRdfConfiguration mongoConf) throws ForwardChainException {
	Preconditions.checkNotNull(mongoConf);
	final String mongoDBName = mongoConf.getMongoDBName();
	final String collectionName = mongoConf.getTriplesCollectionName();
	mongoConf.setFlush(false);
	final StatefulMongoDBRdfConfiguration statefulConf;
	try {
	if (mongoConf instanceof StatefulMongoDBRdfConfiguration) {
	statefulConf = (StatefulMongoDBRdfConfiguration) mongoConf;
	this.dao = new MongoDBRyaDAO();
	this.dao.setConf(statefulConf);
	this.dao.init();
	}
	else {
	this.dao = RyaSailFactory.getMongoDAO(mongoConf);
	statefulConf = this.dao.getConf();
	}
	} catch (RyaDAOException e) {
	throw new ForwardChainException("Can't connect to Rya.", e);
	}
	final MongoClient mongoClient = statefulConf.getMongoClient();
	final MongoDatabase mongoDB = mongoClient.getDatabase(mongoDBName);
	this.baseCollection = mongoDB.getCollection(collectionName);
	this.pipelineVisitor = new SparqlToPipelineTransformVisitor(this.baseCollection);
	this.engine = this.dao.getQueryEngine();
	this.backup = new SailExecutionStrategy(statefulConf);
	final MongoDbBatchWriterConfig writerConfig = MongoDbBatchWriterUtils.getMongoDbBatchWriterConfig(statefulConf);
	final CollectionType<Document> ct = new MongoCollectionType(baseCollection);
	this.batchWriter = new MongoDbBatchWriter<>(ct, writerConfig);
	try {
	this.batchWriter.start();
	} catch (final MongoDbBatchWriterException e) {
	throw new ForwardChainException("Error starting MongoDB batch writer", e);
	}
	}

	/**
	* Execute a CONSTRUCT rule by converting it into a pipeline, iterating
	* through the resulting documents, and inserting them back to the data
	* store as new triples. If pipeline conversion fails, falls back on
	* default execution strategy.
	* @param rule A construct query rule; not null.
	* @param metadata StatementMetadata to attach to new triples; not null.
	* @return The number of new triples inferred.
	* @throws ForwardChainException if execution fails.
	*/
	@Override
	public long executeConstructRule(AbstractConstructRule rule,
	StatementMetadata metadata) throws ForwardChainException {
	Preconditions.checkNotNull(rule);
	logger.info("Applying inference rule " + rule + "...");
	long timestamp = System.currentTimeMillis();
	// Get a pipeline that turns individual matches into triples
	List<Bson> pipeline = null;
	try {
	int requireSourceLevel = 0;
	if (!usedBackup) {
	// If we can assume derivation levels are set properly, we can optimize by
	// pruning any derived fact whose sources are all old information. (i.e. we can
	// infer that the pruned fact would have already been derived in a previous
	// step.) But if the backup strategy has ever been used, the source triples aren't
	// guaranteed to have derivation level set.
	requireSourceLevel = requiredLevel;
	}
	pipeline = toPipeline(rule, requireSourceLevel, timestamp);
	}
	catch (ForwardChainException e) {
	logger.error(e);
	}
	if (pipeline == null) {
	if (backup == null) {
	logger.error("Couldn't convert " + rule + " to pipeline:");
	for (String line : rule.getQuery().toString().split("\n")) {
	logger.error("\t" + line);
	}
	throw new UnsupportedOperationException("Couldn't convert query to pipeline.");
	}
	else {
	logger.debug("Couldn't convert " + rule + " to pipeline:");
	for (String line : rule.getQuery().toString().split("\n")) {
	logger.debug("\t" + line);
	}
	logger.debug("Using fallback strategy.");
	usedBackup = true;
	return backup.executeConstructRule(rule, metadata);
	}
	}
	// Execute the pipeline
	for (Bson step : pipeline) {
	logger.debug("\t" + step.toString());
	}
	LongAdder count = new LongAdder();
	baseCollection.aggregate(pipeline)
	.allowDiskUse(true)
	.batchSize(PIPELINE_BATCH_SIZE)
	.forEach(new Block<Document>() {
	@Override
	public void apply(Document doc) {
	final DBObject dbo = (DBObject) JSON.parse(doc.toJson());
	RyaStatement rstmt = storageStrategy.deserializeDBObject(dbo);
	if (!statementExists(rstmt)) {
	count.increment();
	doc.replace(SimpleMongoDBStorageStrategy.STATEMENT_METADATA, metadata.toString());
	try {
	batchWriter.addObjectToQueue(doc);
	} catch (MongoDbBatchWriterException e) {
	logger.error("Couldn't insert " + rstmt, e);
	}
	}
	}
	});
	try {
	batchWriter.flush();
	} catch (MongoDbBatchWriterException e) {
	throw new ForwardChainException("Error writing to Mongo", e);
	}
	logger.info("Added " + count + " new statements.");
	executionTimes.compute(rule, (r, previous) -> {
	if (previous != null && previous > timestamp) {
	return previous;
	}
	else {
	return timestamp;
	}
	});
	return count.longValue();
	}

	private boolean statementExists(RyaStatement rstmt) {
	try {
	return engine.query(new RyaQuery(rstmt)).iterator().hasNext();
	} catch (RyaDAOException e) {
	logger.error("Error querying for " + rstmt, e);
	return false;
	}
	}

	/**
	* Flush and close the batch writer, and shut down the backup
	* SailExecutionStrategy.
	* @throws ForwardChainException if the batch writer or backup strategy
	* throw any errors.
	*/
	@Override
	public void shutDown() throws ForwardChainException {
	backup.shutDown();
	try {
	batchWriter.shutdown();
	} catch (MongoDbBatchWriterException e) {
	throw new ForwardChainException("Error shutting down batch writer", e);
	}
	}

	/**
	* Converts a construct rule into a series of documents representing
	* aggregation pipeline steps.
	* @param rule A construct query rule.
	* @param sourceLevel Only make derivations whose source triples have this
	* derivation level or higher, i.e. took some number of forward chaining
	* steps to infer. Set to zero to skip this check.
	* @param timestamp Timestamp to be set for all inferred triples.
	* @return An aggregation pipeline.
	* @throws ForwardChainException if pipeline construction fails.
	*/
	private List<Bson> toPipeline(AbstractConstructRule rule, int sourceLevel,
	long timestamp) throws ForwardChainException {
	TupleExpr tupleExpr = rule.getQuery().getTupleExpr();
	if (!(tupleExpr instanceof QueryRoot)) {
	tupleExpr = new QueryRoot(tupleExpr);
	}
	try {
	tupleExpr.visit(pipelineVisitor);
	} catch (Exception e) {
	throw new ForwardChainException("Error converting construct rule to an aggregation pipeline", e);
	}
	if (tupleExpr instanceof QueryRoot) {
	QueryRoot root = (QueryRoot) tupleExpr;
	if (root.getArg() instanceof AggregationPipelineQueryNode) {
	AggregationPipelineQueryNode pipelineNode = (AggregationPipelineQueryNode) root.getArg();
	pipelineNode.distinct(); // require distinct triples
	pipelineNode.requireSourceDerivationDepth(sourceLevel);
	long latestTime = executionTimes.getOrDefault(rule, 0L);
	if (latestTime > 0) {
	pipelineNode.requireSourceTimestamp(latestTime);
	}
	return pipelineNode.getTriplePipeline(timestamp, false);
	}
	}
	return null;
	}
	}