| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Package |
| /////////////// |
| package org.apache.jena.ontology; |
| |
| |
| // Imports |
| /////////////// |
| import java.util.*; |
| import java.util.function.Predicate; |
| |
| import org.apache.jena.rdf.model.* ; |
| import org.apache.jena.shared.JenaException ; |
| |
| |
| /** |
| * <p> |
| * Some general utilities and algorithms to support developers working with the |
| * general classes in the Jena ontology API. <strong>Warning</strong> these |
| * utilities are <strong>experimental</strong>. Extensive testing has not yet |
| * occurred (see {@code org.apache.jena.ontology.impl.TestOntTools} in the |
| * test area for basic unit tests), |
| * and in particular performance testing has not been carried out yet. |
| * Users are advised to exercise caution before relying on these utilities in |
| * production code. Please send any comments or suggestions to the |
| * <a href="http://tech.groups.yahoo.com/group/jena-dev">Jena support email list</a>. |
| * </p> |
| */ |
| public class OntTools |
| { |
| // Constants |
| ////////////////////////////////// |
| |
| // Static variables |
| ////////////////////////////////// |
| |
| // static private Logger log = LoggerFactory.getLogger( OntTools.class ); |
| |
| // Instance variables |
| ////////////////////////////////// |
| |
| // Constructors |
| ////////////////////////////////// |
| |
| // External signature methods |
| ////////////////////////////////// |
| |
| /** |
| * <p>Answer the lowest common ancestor of two classes in a given ontology. This |
| * is the class that is farthest from the root concept (defaulting to |
| * <code>owl:Thing</code> which is a super-class of both <code>u</code> |
| * and <code>v</code>. The algorithm is based on |
| * <a href="http://en.wikipedia.org/wiki/Tarjan's_off-line_least_common_ancestors_algorithm">Tarjan's |
| * off-line LCA</a>. The current implementation expects that the given model: |
| * </p> |
| * <ul> |
| * <li>is transitively closed over the <code>subClassOf</code> relation</li> |
| * <li>can cheaply determine <em>direct sub-class</em> relations</li> |
| * </ul> |
| * <p>Both of these conditions are true of the built-in Jena OWL reasoners, |
| * such as {@link OntModelSpec#OWL_MEM_MICRO_RULE_INF}, and external DL |
| * reasoners such as Pellet.</p> |
| * |
| * @param m The ontology model being queried to find the LCA, which should conform |
| * to the reasoner capabilities described above |
| * @param u An ontology class |
| * @param v An ontology class |
| * @return The LCA of <code>u</code> and <code>v</code> |
| * @exception JenaException if the language profile of the given model does not |
| * define a top concept (e.g. <code>owl:Thing</code>) |
| */ |
| public static OntClass getLCA( OntModel m, OntClass u, OntClass v ) { |
| Resource root = m.getProfile().THING(); |
| if (root == null) { |
| throw new JenaException( "The given OntModel has a language profile that does not define a generic root class (such as owl:Thing)" ); |
| } |
| |
| root = root.inModel( m ); |
| return getLCA( m, root.as( OntClass.class ), u, v ); |
| } |
| |
| /** |
| * Answer the lowest common ancestor of two classes, assuming that the given |
| * class is the root concept to start searching from. See {@link #getLCA(OntModel, OntClass, OntClass)} |
| * for details. |
| * |
| * @param m The ontology model being queried to find the LCA, which should conform |
| * to the reasoner capabilities described above |
| * @param root The root concept, which will be the starting point for the algorithm |
| * @param u An ontology class |
| * @param v An ontology class |
| * @return The LCA of <code>u</code> and <code>v</code> |
| * @exception JenaException if the language profile of the given model does not |
| * define a top concept (e.g. <code>owl:Thing</code>) |
| */ |
| public static OntClass getLCA( OntModel m, OntClass root, OntClass u, OntClass v ) { |
| // check some common cases first |
| if (u.equals( root ) || v.equals( root )) { |
| return root; |
| } |
| |
| if (u.hasSubClass( v )) { |
| return u; |
| } |
| |
| if (v.hasSubClass( u )) { |
| return v; |
| } |
| |
| // not a common case, so apply Tarjan's LCA algorithm |
| LCAIndex index = new LCAIndex(); |
| lca( root, u, v, index ); |
| return (OntClass) index.getLCA( u, v ); |
| } |
| |
| |
| /** |
| * <p>Answer the shortest path from the <code>start</code> resource to the <code>end</code> RDF node, |
| * such that every step on the path is accepted by the given filter. A path is a {@link List} |
| * of RDF {@link Statement}s. The subject of the first statement in the list is <code>start</code>, |
| * and the object of the last statement in the list is <code>end</code>.</p> |
| * <p>The <code>onPath</code> argument is a {@link Predicate}, which accepts a statement and returns |
| * true if the statement should be considered to be on the path. To search for an unconstrained |
| * path, pass {@code ()->true} as an argument. To search for a path whose predicates match a |
| * fixed restricted set of property names, pass an instance of {@link PredicatesFilter}.</p> |
| * <p>If there is more than one path of minimal length from <code>start</code> to <code>end</code>, |
| * this method returns an arbitrary one. The algorithm is blind breadth-first search, |
| * with loop detection.</p> |
| * |
| * @param m The model in which we are seeking a path |
| * @param start The starting resource |
| * @param end The end, or goal, node |
| * @param onPath A filter which determines whether a given statement can be considered part |
| * of the path |
| * @return A path, consisting of a list of statements whose first subject is <code>start</code>, |
| * and whose last object is <code>end</code>, or null if no such path exists. |
| */ |
| public static Path findShortestPath( Model m, Resource start, RDFNode end, Predicate<Statement> onPath ) { |
| List<Path> bfs = new LinkedList<>(); |
| Set<Resource> seen = new HashSet<>(); |
| |
| // initialise the paths |
| for (Iterator<Statement> i = m.listStatements( start, null, (RDFNode) null ).filterKeep( onPath ); i.hasNext(); ) { |
| bfs.add( new Path().append( i.next() ) ); |
| } |
| |
| // search |
| Path solution = null; |
| while (solution == null && !bfs.isEmpty()) { |
| Path candidate = bfs.remove( 0 ); |
| |
| if (candidate.hasTerminus( end )) { |
| solution = candidate; |
| } |
| else { |
| Resource terminus = candidate.getTerminalResource(); |
| if (terminus != null) { |
| seen.add( terminus ); |
| |
| // breadth-first expansion |
| for (Iterator<Statement> i = terminus.listProperties().filterKeep( onPath ); i.hasNext(); ) { |
| Statement link = i.next(); |
| |
| // no looping allowed, so we skip this link if it takes us to a node we've seen |
| if (!seen.contains( link.getObject() )) { |
| bfs.add( candidate.append( link ) ); |
| } |
| } |
| } |
| } |
| } |
| |
| return solution; |
| } |
| |
| |
| /** |
| * Answer a list of the named hierarchy roots of a given {@link OntModel}. This |
| * will be similar to the results of {@link OntModel#listHierarchyRootClasses()}, |
| * with the added constraint that every member of the returned iterator will be a |
| * named class, not an anonymous class expression. The named root classes are |
| * calculated from the root classes, by recursively replacing every anonymous class |
| * with its direct sub-classes. Thus it can be seen that the values in the list |
| * consists of the shallowest fringe of named classes in the hierarchy. |
| * @param m An ontology model |
| * @return A list of classes whose members are the named root classes of the |
| * class hierarchy in <code>m</code> |
| */ |
| public static List<OntClass> namedHierarchyRoots( OntModel m ) { |
| List<OntClass> nhr = new ArrayList<>(); // named roots |
| List<OntClass> ahr = new ArrayList<>(); // anon roots |
| |
| // do the initial partition of the root classes |
| partitionByNamed( m.listHierarchyRootClasses(), nhr, ahr ); |
| |
| // now push the fringe down until we have only named classes |
| while (!ahr.isEmpty()) { |
| OntClass c = ahr.remove( 0 ); |
| partitionByNamed( c.listSubClasses( true ), nhr, ahr ); |
| } |
| |
| return nhr; |
| } |
| |
| |
| // Internal implementation methods |
| ////////////////////////////////// |
| |
| /** |
| * Compute the LCA disjoint set at <code>cls</code>, noting that we are |
| * searching for the LCA of <code>uCls</code> and <code>vCls</code>. |
| * @param cls The class we are testing (this is 'u' in the Wiki article) |
| * @param uCls One of the two classes we are searching for the LCA of. We |
| * have simplified the set P of pairs to the unity set {uCls,vCls} |
| * @param vCls One of the two classes we are searching for the LCA of. We |
| * have simplified the set P of pairs to the unity set {uCls,vCls} |
| * @param index A data structure mapping resources to disjoint sets (since |
| * we can't side-effect Jena resources), and which is used to record the |
| * LCA pairs |
| */ |
| protected static DisjointSet lca( OntClass cls, OntClass uCls, OntClass vCls, LCAIndex index ) { |
| // log.debug( "Entering lca(), cls = " + cls ); |
| DisjointSet clsSet = index.getSet( cls ); |
| if (clsSet.isBlack()) { |
| // already visited |
| return clsSet; |
| } |
| |
| // not visited yet |
| clsSet.setAncestor( clsSet ); |
| |
| // for each child of cls |
| for (Iterator<OntClass> i = cls.listSubClasses( true ); i.hasNext(); ) { |
| OntClass child = i.next(); |
| |
| if (child.equals( cls ) || child.equals( cls.getProfile().NOTHING() )) { |
| // we ignore the reflexive case and bottom |
| continue; |
| } |
| |
| // compute the LCA of the sub-tree |
| DisjointSet v = lca( child, uCls, vCls, index ); |
| |
| // union the two disjoint sets together |
| clsSet.union( v ); |
| |
| // propagate the distinguished member |
| clsSet.find().setAncestor( clsSet ); |
| } |
| |
| // this node is done |
| clsSet.setBlack(); |
| |
| // are we inspecting one of the elements we're interested in? |
| if (cls.equals( uCls )) { |
| checkSolution( uCls, vCls, index ); |
| } |
| else if (cls.equals( vCls )) { |
| checkSolution( vCls, uCls, index ); |
| } |
| |
| return clsSet; |
| } |
| |
| /** |
| * Check to see if we have found a solution to the problem. |
| * TODO: we could throw an exception to simulate a non-local exit |
| * here, since we've assumed that P is the unity set. |
| * @param uCls |
| * @param vCls |
| * @param index |
| */ |
| protected static void checkSolution( OntClass uCls, OntClass vCls, LCAIndex index ) { |
| DisjointSet vSet = index.getSet( vCls ); |
| DisjointSet uSet = index.getSet( uCls ); |
| |
| if (vSet != null && vSet.isBlack() && !vSet.used() && |
| uSet != null && uSet.isBlack() && !uSet.used()) { |
| vSet.setUsed(); |
| uSet.setUsed(); |
| // log.debug( "Found LCA: u = " + uCls + ", v = " + vCls ); |
| OntClass lca = (OntClass) vSet.find().getAncestor().getNode(); |
| // log.debug( "Found LCA: lca = " + lca ); |
| index.setLCA( uCls, vCls, lca ); |
| } |
| |
| } |
| |
| /** |
| * Partition the members of an iterator into two lists, according to whether |
| * they are named or anonymous classes |
| * @param i An iterator to partition |
| * @param named A list of named classes |
| * @param anon A list of anonymous classes |
| */ |
| protected static void partitionByNamed( Iterator<? extends OntClass> i, List<OntClass> named, List<OntClass> anon ) { |
| while (i.hasNext()) { |
| OntClass c = i.next(); |
| boolean ignore = false; |
| |
| // duplicate check: we ignore this class if we've already got it |
| if (named.contains( c )) { |
| ignore = true; |
| } |
| |
| // subsumption check: c must have only anon classes or Thing |
| // as super-classes to still qualify as a root class |
| Resource thing = c.getProfile().THING(); |
| for (Iterator<OntClass> j = c.listSuperClasses(); !ignore && j.hasNext(); ) { |
| OntClass sup = j.next(); |
| if (!((thing != null && sup.equals( thing )) || |
| sup.isAnon() || |
| sup.equals( c ))) |
| { |
| ignore = true; |
| } |
| } |
| |
| if (!ignore) { |
| // place the class in the appropriate partition |
| (c.isAnon() ? anon : named).add( c ); |
| } |
| } |
| } |
| |
| |
| //============================================================================== |
| // Inner class definitions |
| //============================================================================== |
| |
| /** |
| * A simple representation of disjoint sets |
| */ |
| public static class DisjointSet |
| { |
| /** The resource this set represents */ |
| private Resource m_node; |
| |
| /** The parent set in a union */ |
| private DisjointSet m_parent; |
| |
| /** Heuristic used to build balanced unions */ |
| private int m_rank; |
| |
| /** The link to the distinguished member set */ |
| private DisjointSet m_ancestor; |
| |
| /** Set to true when the node has been processed */ |
| private boolean m_black = false; |
| |
| /** Set to true when we've inspected a black set, since the result is only |
| * correct just after both of the sets for u and v have been marked black */ |
| private boolean m_used = false; |
| |
| public DisjointSet( Resource node ) { |
| m_node = node; |
| m_rank = 0; |
| m_parent = this; |
| } |
| |
| public Resource getNode() { |
| return m_node; |
| } |
| |
| public DisjointSet getParent() { |
| return m_parent; |
| } |
| |
| public void setParent( DisjointSet parent ) { |
| m_parent = parent; |
| } |
| |
| public int getRank() { |
| return m_rank; |
| } |
| |
| public void incrementRank() { |
| m_rank++; |
| } |
| |
| public DisjointSet getAncestor() { |
| return m_ancestor; |
| } |
| |
| public void setAncestor( DisjointSet anc ) { |
| m_ancestor = anc; |
| } |
| |
| public void setBlack() { |
| m_black = true; |
| } |
| |
| public boolean isBlack() { |
| return m_black; |
| } |
| |
| public boolean used() { |
| return m_used; |
| } |
| |
| public void setUsed() { |
| m_used = true; |
| } |
| |
| /** |
| * The find operation collapses the pointer to the root parent, which is |
| * one of Tarjan's standard optimisations. |
| * @return The representative of the union containing this set |
| */ |
| public DisjointSet find() { |
| DisjointSet root; |
| if (getParent() == this) { |
| // the representative of the set |
| root = this; |
| } |
| else { |
| // otherwise, seek the representative of my parent and save it |
| root = getParent().find(); |
| setParent( root ); |
| } |
| |
| return root; |
| } |
| |
| /** |
| * The union of two sets |
| * @param y |
| */ |
| public void union( DisjointSet y ) { |
| DisjointSet xRoot = find(); |
| DisjointSet yRoot = y.find(); |
| |
| if (xRoot.getRank() > yRoot.getRank()) { |
| yRoot.setParent( xRoot ); |
| } |
| else if (yRoot.getRank() > xRoot.getRank()) { |
| xRoot.setParent( yRoot ); |
| } |
| else if (xRoot != yRoot) { |
| yRoot.setParent( xRoot ); |
| xRoot.incrementRank(); |
| } |
| } |
| |
| /** |
| * @see java.lang.Object#toString() |
| * @return A string representation of this set for debugging |
| */ |
| @Override |
| public String toString() { |
| StringBuilder buf = new StringBuilder(); |
| buf.append( "DisjointSet{node=" ); |
| buf.append( m_node ); |
| buf.append( ",anc=" ); |
| buf.append( (getAncestor() == this) ? "self" : (getAncestor() == null ? "null" : getAncestor().toShortString()) ); |
| buf.append( ",parent=" ); |
| buf.append( (getParent() == this) ? "self" : (getParent() == null ? "null" : getParent().toShortString()) ); |
| buf.append( ",rank=" ); |
| buf.append( getRank() ); |
| buf.append( m_black ? ",black" : ",white" ); |
| buf.append( "}"); |
| |
| return buf.toString(); |
| } |
| |
| public String toShortString() { |
| StringBuilder buf = new StringBuilder(); |
| buf.append( "DisjointSet{node=" ); |
| buf.append( m_node ); |
| buf.append( ",parent=" ); |
| buf.append( (getParent() == this) ? "self" : (getParent() == null ? "null" : getParent().toShortString()) ); |
| buf.append( "...}" ); |
| |
| return buf.toString(); |
| } |
| } |
| |
| /** |
| * Simple data structure mapping RDF nodes to disjoint sets, and |
| * pairs of resources to their LCA. |
| */ |
| public static class LCAIndex |
| { |
| private Map<Resource, DisjointSet> m_setIndex = new HashMap<>(); |
| private Map<Resource, Map<Resource, Resource>> m_lcaIndex = new HashMap<>(); |
| |
| public Resource getLCA( Resource u, Resource v ) { |
| Map<Resource, Resource> map = m_lcaIndex.get( u ); |
| Resource lca = (map == null) ? null : (Resource) map.get( v ); |
| |
| if (lca == null) { |
| map = m_lcaIndex.get( v ); |
| lca = (map == null) ? null : (Resource) map.get( u ); |
| } |
| |
| return lca; |
| } |
| |
| public void setLCA( Resource u, Resource v, Resource lca ) { |
| Map<Resource, Resource> uMap = m_lcaIndex.get( u ); |
| if (uMap == null) { |
| uMap = new HashMap<>(); |
| m_lcaIndex.put( u, uMap ); |
| } |
| uMap.put( v, lca ); |
| } |
| |
| public DisjointSet getSet( Resource r ) { |
| DisjointSet s = m_setIndex.get( r ); |
| if (s == null) { |
| // log.debug( "Generating new set for " + r ); |
| s = new DisjointSet( r ); |
| m_setIndex.put( r, s ); |
| } |
| else { |
| // log.debug( "Retrieving old set for " + r ); |
| |
| } |
| return s; |
| } |
| } |
| |
| /** |
| * A path is an application of {@link java.util.List} containing only {@link Statement} |
| * objects, and in which for all adjacent elements <code>S<sub>i-1</sub></code> |
| * and <code>S<sub>i</sub></code>, where <code>i > 0</code>, it is true that: |
| * <code><pre>S<sub>i-1</sub>.getObject().equals( S<sub>i</sub>.getSubject() )</pre></code> |
| */ |
| public static class Path extends ArrayList<Statement> |
| { |
| public Path() { |
| super(); |
| } |
| |
| public Path( Path basePath ) { |
| super( basePath ); |
| } |
| |
| public Statement getStatement( int i ) { |
| return get( i ); |
| } |
| |
| /** Answer a new Path whose elements are this Path with <code>s</code> added at the end */ |
| public Path append( Statement s ) { |
| Path newPath = new Path( this ); |
| newPath.add( s ); |
| return newPath; |
| } |
| |
| /** Answer true if the last link on the path has object equal to <code>n</code> */ |
| public boolean hasTerminus( RDFNode n ) { |
| return n != null && n.equals( getTerminal() ); |
| } |
| |
| /** Answer the RDF node at the end of the path, if defined, or null */ |
| public RDFNode getTerminal() { |
| return size() > 0 ? get( size() - 1 ).getObject() : null; |
| } |
| |
| /** Answer the resource at the end of the path, if defined, or null */ |
| public Resource getTerminalResource() { |
| RDFNode n = getTerminal(); |
| return (n != null && n.isResource()) ? (Resource) n : null; |
| } |
| } |
| |
| /** |
| * A filter which accepts statements whose predicate matches one of a collection |
| * of predicates held by the filter object. |
| */ |
| public static class PredicatesFilter implements Predicate<Statement> |
| { |
| public Collection<Property> m_preds; |
| |
| /** Accept statements with any predicate from <code>preds</code> */ |
| public PredicatesFilter( Collection<Property> preds ) { |
| m_preds = preds; |
| } |
| |
| /** Accept statements with any predicate from <code>preds</code> */ |
| public PredicatesFilter( Property[] preds ) { |
| m_preds = new HashSet<>(); |
| for ( Property pred : preds ) |
| { |
| m_preds.add( pred ); |
| } |
| } |
| |
| /** Accept statements with predicate <code>pred</code> */ |
| public PredicatesFilter( Property pred ) { |
| m_preds = new HashSet<>(); |
| m_preds.add( pred ); |
| } |
| |
| @Override public boolean test( Statement s ) { |
| return m_preds.contains( s.getPredicate() ); |
| } |
| } |
| } |