blob: 1b185e7c22db987832b760ffcda5e309d31d9b84 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.graph;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.jena.atlas.iterator.Iter;
import org.apache.jena.graph.impl.GraphWithPerform;
import org.apache.jena.util.IteratorCollection;
import org.apache.jena.util.iterator.ExtendedIterator;
import org.apache.jena.util.iterator.WrappedIterator;
/**
An ad-hoc collection of useful code for graphs
*/
public class GraphUtil
{
/**
* Only static methods here - the class cannot be instantiated.
*/
private GraphUtil()
{}
/** Return an iterator over the unique subjects with predicate p and object o.
* p and o can be wildcards (Node.ANY)
* @param g Graph
* @param p Predicate - may be Node.ANY
* @param o Object - may be Node.ANY
* @return ExtendedIterator
*/
public static ExtendedIterator<Node> listSubjects(Graph g, Node p, Node o) {
// Restore a minimal QueryHandler?
ExtendedIterator<Triple> iter = g.find(Node.ANY, p, o) ;
Set<Node> nodes = iter.mapWith(t -> t.getSubject()).toSet() ;
return WrappedIterator.createNoRemove(nodes.iterator()) ;
}
/** Return an iterator over the unique predicate between s and o.
* s and o can be wildcards (Node.ANY)
* @param g Graph
* @param s Subject - may be Node.ANY
* @param o Object - may be Node.ANY
* @return ExtendedIterator
*/
public static ExtendedIterator<Node> listPredicates(Graph g, Node s, Node o) {
ExtendedIterator<Triple> iter = g.find(s,Node.ANY, o) ;
Set<Node> nodes = iter.mapWith(t -> t.getPredicate()).toSet() ;
return WrappedIterator.createNoRemove(nodes.iterator()) ;
}
/** Return an iterator over the unique objects with a given subject and object.
* s and p can be wildcards (Node.ANY)
* @param g Graph
* @param s Subject - may be Node.ANY
* @param p Predicate - may be Node.ANY
* @return ExtendedIterator
*/
public static ExtendedIterator<Node> listObjects(Graph g, Node s, Node p) {
ExtendedIterator<Triple> iter = g.find(s, p, Node.ANY) ;
Set<Node> nodes = iter.mapWith(t -> t.getObject()).toSet() ;
return WrappedIterator.createNoRemove(nodes.iterator()) ;
}
/** Does the graph use the node anywhere as a subject, predicate or object? */
public static boolean containsNode(Graph graph, Node node) {
return
graph.contains(node, Node.ANY, Node.ANY) ||
graph.contains(Node.ANY, Node.ANY, node) ||
graph.contains(Node.ANY, node, Node.ANY) ;
}
/* Control how events are dealt with in bulk */
private static final boolean OldStyle = true ;
/**
* Answer an iterator covering all the triples in the specified graph.
*
* @param g
* the graph from which to extract triples
* @return an iterator over all the graph's triples
*/
public static ExtendedIterator<Triple> findAll(Graph g) {
return g.find() ;
}
public static void add(Graph graph, Triple[] triples) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
GraphWithPerform g = (GraphWithPerform)graph ;
for (Triple t : triples )
g.performAdd(t) ;
graph.getEventManager().notifyAddArray(graph, triples) ;
} else {
for (Triple t : triples )
graph.add(t) ;
}
}
public static void add(Graph graph, List<Triple> triples) {
addIteratorWorkerDirect(graph, triples.iterator()) ;
if ( OldStyle && graph instanceof GraphWithPerform )
graph.getEventManager().notifyAddList(graph, triples) ;
}
public static void add(Graph graph, Iterator<Triple> it) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
// Materialize for the notify.
List<Triple> s = IteratorCollection.iteratorToList(it) ;
addIteratorWorkerDirect(graph, s.iterator());
graph.getEventManager().notifyAddIterator(graph, s) ;
}
else
addIteratorWorker(graph, it);
}
/** Add triples into the destination (arg 1) from the source (arg 2)*/
public static void addInto(Graph dstGraph, Graph srcGraph ) {
if ( dstGraph == srcGraph && ! dstGraph.getEventManager().listening() )
return ;
dstGraph.getPrefixMapping().setNsPrefixes(srcGraph.getPrefixMapping()) ;
addIteratorWorker(dstGraph, findAll( srcGraph ));
dstGraph.getEventManager().notifyAddGraph( dstGraph, srcGraph );
}
private static void addIteratorWorker( Graph graph, Iterator<Triple> it ) {
List<Triple> s = IteratorCollection.iteratorToList( it );
addIteratorWorkerDirect(graph, s.iterator());
}
private static void addIteratorWorkerDirect( Graph graph, Iterator<Triple> it ) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
GraphWithPerform g = (GraphWithPerform)graph;
it.forEachRemaining(g::performAdd);
} else {
it.forEachRemaining(graph::add);
}
}
private static boolean requireEvents(Graph graph) {
return graph.getEventManager().listening() ;
}
public static void delete(Graph graph, Triple[] triples) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
GraphWithPerform g = (GraphWithPerform)graph ;
for ( Triple t : triples )
g.performDelete(t) ;
graph.getEventManager().notifyDeleteArray(graph, triples) ;
} else {
for ( Triple t : triples )
graph.delete(t) ;
}
}
public static void delete(Graph graph, List<Triple> triples) {
deleteIteratorWorkerDirect(graph, triples.iterator());
if ( OldStyle && graph instanceof GraphWithPerform )
graph.getEventManager().notifyDeleteList(graph, triples) ;
}
public static void delete(Graph graph, Iterator<Triple> it) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
// Materialize for the notify.
List<Triple> s = IteratorCollection.iteratorToList(it) ;
deleteIteratorWorkerDirect(graph, s.iterator());
graph.getEventManager().notifyDeleteIterator(graph, s) ;
} else
deleteIteratorWorker(graph, it);
}
private static final int sliceSize = 1000 ;
/** A safe and cautious remove() function that converts the remove to
* a number of {@link Graph#delete(Triple)} operations.
* <p>
* To avoid any possible ConcurrentModificationExceptions,
* it finds batches of triples, deletes them and tries again until
* no more triples matching the input can be found.
*/
public static void remove(Graph g, Node s, Node p, Node o) {
// Beware of ConcurrentModificationExceptions.
// Delete in batches.
// That way, there is no active iterator when a delete
// from the indexes happens.
Triple[] array = new Triple[sliceSize] ;
while (true) {
// Convert/cache s,p,o?
// The Node Cache will catch these so don't worry unduely.
ExtendedIterator<Triple> iter = g.find(s, p, o) ;
// Get a slice
int len = 0 ;
for ( ; len < sliceSize ; len++ ) {
if ( !iter.hasNext() )
break ;
array[len] = iter.next() ;
}
// Delete them.
for ( int i = 0 ; i < len ; i++ ) {
g.delete(array[i]) ;
array[i] = null ;
}
// Finished?
if ( len < sliceSize )
break ;
}
}
/**
* Delete triples in {@code srcGraph} from {@code dstGraph}
* by looping on {@code srcGraph}.
*
*/
public static void deleteLoopSrc(Graph dstGraph, Graph srcGraph) {
deleteIteratorWorker(dstGraph, findAll(srcGraph)) ;
dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ;
}
/**
* Delete the triple in {@code srcGraph} from {@code dstGraph}
* by checking the contents of {@code dsgGraph} against the {@code srcGraph}.
* This involves calling {@code srcGraph.contains}.
* @implNote
* {@code dstGraph.size()} is used by this method.
*/
public static void deleteLoopDst(Graph dstGraph, Graph srcGraph) {
// Size the list to avoid reallocation on growth.
int dstSize = dstGraph.size();
List<Triple> toBeDeleted = new ArrayList<>(dstSize);
Iterator<Triple> iter = findAll(dstGraph);
for( ; iter.hasNext() ; ) {
Triple t = iter.next();
if ( srcGraph.contains(t) )
toBeDeleted.add(t);
}
deleteIteratorWorkerDirect(dstGraph, toBeDeleted.iterator());
dstGraph.getEventManager().notifyDeleteGraph(dstGraph, srcGraph) ;
}
/**
* Delete the triples supplied by an iterator. This function is "concurrent
* modification" safe - it internally takes a copy of the iterator.
*/
private static void deleteIteratorWorker(Graph graph, Iterator<Triple> it) {
List<Triple> s = IteratorCollection.iteratorToList(it) ;
deleteIteratorWorkerDirect(graph, s.iterator());
}
/**
* Delete the triples supplied by an iterator. This function is not "concurrent
* modification" safe; it assumes it can use the iterator while deleting from the
* graph.
*/
private static void deleteIteratorWorkerDirect(Graph graph, Iterator<Triple> it) {
if ( OldStyle && graph instanceof GraphWithPerform ) {
GraphWithPerform g = (GraphWithPerform)graph ;
it.forEachRemaining(g::performDelete);
} else {
it.forEachRemaining(graph::delete);
}
}
private static int MIN_SRC_SIZE = 1000 ;
// If source and destination are large, limit the search for the best way round to "deleteFrom"
private static int MAX_SRC_SIZE = 1000*1000 ;
private static int DST_SRC_RATIO = 2 ;
/**
* Delete triples in the destination (arg 1) as given in the source (arg 2).
*
* @implNote
* This is designed for the case of {@code dstGraph} being comparable or much larger than
* {@code srcGraph} or {@code srcGraph} having a lot of triples to actually be
* deleted from {@code dstGraph}. This includes teh case of large, persistent {@code dstGraph}.
* <p>
* It is not designed for a large {@code srcGraph} and large {@code dstGraph}
* with only a few triples in common to delete from {@code dstGraph}. It is better to
* calculate the difference in some way, and copy into a small graph to use as the {@code srcGraph}.
* <p>
* To force delete by looping on {@code srcGraph}, use {@link #deleteLoopSrc(Graph, Graph)}.
* <p>
* For large {@code srcGraph} and small {@code dstGraph}, use {@link #deleteLoopDst}.
*
* See discussion on <a href="https://github.com/apache/jena/pull/212">jena/pull/212</a>,
* (archived at <a href="https://issues.apache.org/jira/browse/JENA-1284">JENA-1284</a>).
*/
public static void deleteFrom(Graph dstGraph, Graph srcGraph) {
boolean events = requireEvents(dstGraph);
if ( dstGraph == srcGraph && ! events ) {
dstGraph.clear();
return;
}
boolean loopOnSrc = decideHowtoExecute(dstGraph, srcGraph);
if ( loopOnSrc ) {
// Normal path.
deleteLoopSrc(dstGraph, srcGraph);
return;
}
// Loop on dstGraph, not srcGraph, but need to use srcGraph.contains on this code path.
deleteLoopDst(dstGraph, srcGraph);
}
private static final int CMP_GREATER = 1;
private static final int CMP_EQUAL = 0;
private static final int CMP_LESS = -1;
/**
* Decide whether to loop on dstGraph or srcGraph.
* @param dstGraph
* @param srcGraph
* @return boolean true for "loop on src"
*/
private static boolean decideHowtoExecute(Graph dstGraph, Graph srcGraph) {
//return decideHowtoExecuteBySizeSize(dstGraph, srcGraph);
// Avoid calling dstGraph.size()
return decideHowtoExecuteBySizeStep(dstGraph, srcGraph);
}
/**
* Decide using dstGraph.size() and srcGraph.size()
*/
private static boolean decideHowtoExecuteBySizeSize(Graph dstGraph, Graph srcGraph) {
// Loop on src if:
// size(src) <= MIN_SRC_SIZE : srcGraph is below the threshold MIN_SRC_SIZE (a "Just Do it" number)
// size(src)*DST_SRC_RATIO <= size(dst)
// dstGraph is "much" larger than src where "much" is given by DST_SRC_RATIO
// Assumes dstGraph.size is efficient.
int srcSize = srcGraph.size();
if ( srcSize <= MIN_SRC_SIZE )
return true ;
int dstSize = dstGraph.size();
boolean loopOnSrc = (srcSize <= MIN_SRC_SIZE || dstSize > DST_SRC_RATIO*srcSize) ;
return loopOnSrc;
}
/**
* Avoid dstGraph.size(). Instead step through {@codedstGraph.find} to compare to {@code srcGraph.size()}
*
*/
private static boolean decideHowtoExecuteBySizeStep(Graph dstGraph, Graph srcGraph) {
// loopOnSrc if:
// size(src) <= MIN_SRC_SIZE
// size(src)*DST_SRC_RATIO <= |find(dst)|
int srcSize = srcGraph.size();
if ( srcSize <= MIN_SRC_SIZE )
return true ;
boolean loopOnSrc = (srcSize <= MIN_SRC_SIZE || compareSizeTo(dstGraph, DST_SRC_RATIO*srcSize) == CMP_GREATER) ;
return loopOnSrc;
}
/** Compare the size of a graph to {@code size}, without calling Graph.size
* by iterating on {@code graph.find()} as necessary.
* <p>
* Return -1 , 0, 1 for the comparison.
*/
/*package*/ static int compareSizeTo(Graph graph, int size) {
ExtendedIterator<Triple> it = graph.find();
try {
int stepsTake = Iter.step(it, size);
if ( stepsTake < size )
// Iterator ran out.
return CMP_LESS;
if ( !it.hasNext())
// Finsiehd at the same timne.
return CMP_EQUAL;
// Still more to go
return CMP_GREATER;
} finally {
it.close();
}
}
}