blob: fbe5fd29efd54c1dbd886eb3b0ef3bfefc4f13ff [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.commons.indexedgraph;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.clerezza.rdf.core.BNode;
import org.apache.clerezza.rdf.core.Language;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.AbstractTripleCollection;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link TripleCollection} implementation that uses indexes for <ul>
* <li> subject, predicate, object [SPO]
* <li> predicate, object, subject [POS]
* <li> object, subject, predicate [OSP]
* </ul>
* Indexes are maintained in {@link TreeSet}s with according {@link Comparator}
* instances ({@link #spoComparator}, {@link #posComparator} ,
* {@link #ospComparator}). {@link Resource}s are compared first using the
* {@link Resource#hashCode()} and only if this matches by using
* {@link Resource}{@link #toString()}.<p>
* The {@link #filter(NonLiteral, UriRef, Resource)} implementation is based
* on {@link TreeSet#subSet(Object, Object)}. All Iterators returned directly
* operate on top of one of the internal indexes.
* <p>
* This class is not public, implementations should use {@link IndexedGraph} or
* {@link IndexedMGraph}.
*
* @author rwesten
*/
class IndexedTripleCollection extends AbstractTripleCollection implements TripleCollection {
private static final Logger log = LoggerFactory.getLogger(IndexedTripleCollection.class);
/**
* This map is used to ensure constant ordering for {@link BNode} that do
* have the same hashcode (and therefore result to have the same
* {@link BNode#toString()} value.
*/
private final Map<Integer,List<Resource>> hashCodeConflictMap = new HashMap<Integer,List<Resource>>();
/**
* Compares Triples based on Subject, Predicate, Object
*/
private final Comparator<Triple> spoComparator = new Comparator<Triple>() {
@Override
public int compare(Triple a, Triple b) {
int c = IndexedTripleCollection.compare(a.getSubject(), b.getSubject(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getPredicate(), b.getPredicate(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getObject(), b.getObject(), hashCodeConflictMap);
}
}
return c;
}
};
/**
* The SPO index
*/
private final NavigableSet<Triple> spo = new TreeSet<Triple>(spoComparator);
/**
* Compares Triples based on Predicate, Object, Subject
*/
private final Comparator<Triple> posComparator = new Comparator<Triple>() {
@Override
public int compare(Triple a, Triple b) {
int c = IndexedTripleCollection.compare(a.getPredicate(), b.getPredicate(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getObject(), b.getObject(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getSubject(), b.getSubject(), hashCodeConflictMap);
}
}
return c;
}
};
/**
* The POS index
*/
private final NavigableSet<Triple> pos = new TreeSet<Triple>(posComparator);
/**
* Compares Triples based on Object, Subject, Predicate
*/
private final Comparator<Triple> ospComparator = new Comparator<Triple>() {
@Override
public int compare(Triple a, Triple b) {
int c = IndexedTripleCollection.compare(a.getObject(), b.getObject(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getSubject(), b.getSubject(), hashCodeConflictMap);
if(c == 0){
c = IndexedTripleCollection.compare(a.getPredicate(), b.getPredicate(), hashCodeConflictMap);
}
}
return c;
}
};
/**
* The OSP index
*/
private final NavigableSet<Triple> osp = new TreeSet<Triple>(ospComparator);
/**
* Creates an empty {@link IndexedTripleCollection}
*/
public IndexedTripleCollection() {
super();
}
/**
* Creates a {@link IndexedTripleCollection} using the passed iterator, the iterator
* is consumed before the constructor returns
*
* @param iterator
*/
public IndexedTripleCollection(Iterator<Triple> iterator) {
super();
while (iterator.hasNext()) {
Triple triple = iterator.next();
performAdd(triple);
}
}
/**
* Creates a {@link IndexedTripleCollection} for the specified collection of triples,
* subsequent modification of baseSet do not affect the created instance.
*
* @param iterable over triples
*/
public IndexedTripleCollection(Collection<Triple> baseCollection) {
super();
spo.addAll(baseCollection);
//use internal index to fill the other indexes, because the parsed
//collection might be slow
pos.addAll(spo);
osp.addAll(spo);
}
@Override
protected Iterator<Triple> performFilter(final NonLiteral subject, final UriRef predicate, final Resource object) {
if(subject == null && predicate == null && object == null){ //[n,n,n]
return createIterator(spo, spo.iterator());
}
final Triple low = new TripleImpl(
subject == null ? MIN : subject,
predicate == null ? MIN : predicate,
object == null ? MIN : object);
final Triple high = new TripleImpl(
subject == null ? MAX : subject,
predicate == null ? MAX : predicate,
object == null ? MAX : object);
if(subject != null && predicate != null && object != null){ // [S,P,O]
//NOTE: low.equals(high) in that case!
return createIterator(spo, spo.subSet(low, true, low, true).iterator());
} else if(subject != null && object == null){ //[S,n,n], [S,P,n]
return createIterator(spo, spo.subSet(low, high).iterator());
} else if (predicate != null) { //[n,P,n], [n,P,O]
return createIterator(pos,pos.subSet(low, high).iterator());
} else { //[n,n,O] , [S,n,O]
return createIterator(osp,osp.subSet(low, high).iterator());
}
}
@Override
protected boolean performAdd(Triple triple) {
if(spo.add(triple)){
osp.add(triple);
return pos.add(triple);
}
return false;
}
@Override
protected boolean performRemove(Triple triple) {
if(spo.remove(triple)){
osp.remove(triple);
return pos.remove(triple);
}
return false;
}
@Override
public int size() {
return spo.size();
}
// @Override
// public Iterator<Triple> iterator() {
// return createIterator(spo, spo.iterator());
// }
/**
* Returns an Iterator that ensures that calls to {@link Iterator#remove()}
* remove items from all three indexes
* @param index
* @param base
* @return
*/
private Iterator<Triple> createIterator(final SortedSet<Triple> index,final Iterator<Triple> base){
return new Iterator<Triple>() {
Triple current = null;
@Override
public boolean hasNext() {
return base.hasNext();
}
@Override
public Triple next() {
current = base.next();
return current;
}
@Override
public void remove() {
base.remove();
if(current != null){
if(!(index == spo)){
spo.remove(current);
}
if(!(index == pos)){
pos.remove(current);
}
if(!(index == osp)){
osp.remove(current);
}
}
}
};
}
protected static UriRef MIN = new UriRef("") {
@Override
public int hashCode() {
return Integer.MIN_VALUE;
};
};
protected static UriRef MAX = new UriRef("") {
@Override
public int hashCode() {
return Integer.MAX_VALUE;
};
};
// /**
// * Compares two resources with special support for {@link #MIN} and
// * {@link #MAX} to allow building {@link SortedSet#subSet(Object, Object)}
// * for <code>null</code> values parsed to
// * {@link #filter(NonLiteral, UriRef, Resource)}
// * @param a
// * @param b
// * @return
// */
// protected static int compareHash(Resource a, Resource b, Map<Integer,List<Resource>> confictsMap) {
// int hashA = a.hashCode();
// int hashB = b.hashCode();
// if (hashA != hashB) {
// return hashA > hashB ? 1 : -1;
// }
// //those resources might be equals
// //(1) Check for MIN, MAX (used to build sub-sets). Other resources might
// // have a similar hasCode
// int state = a == MIN || b == MAX ? -1 :
// a == MAX || b == MIN ? 1 : 0;
// if(state == 0){
// if(a.equals(b)){ //check of the resources are equals
// return 0; //return zero
// } else if(//we need to care about HashCode conflicts
// a instanceof BNode && b instanceof BNode){ // of BNodes
// log.info("HashCode conflict for {} and {}",a,b); //we have a conflict
// return resolveBNodeHashConflict(a, b, confictsMap);
// } else { //same hashCode but not equals
// //use the String representation of the Resources to sort them
// String as = resource2String(a);
// String bs = resource2String(b);
// log.info("same hash code {} - compare Strings a: {}, b: {}",
// new Object[]{a.hashCode(),as,bs});
// return as.compareTo(bs);
// }
// }
// return state;
// }
/**
* Resolved BNode hasConflics, by storing the correct order for the affected
* {@link Integer} in a {@link List} of Resource instances.
* @param a the first {@link BNode}
* @param b the second {@link BNode}
* @param confictsMap the Map used to store the order of BNodes with conflicts
* @return the decision taken based on the confictsMap.
*/
private static int resolveBNodeHashConflict(Resource a, Resource b,
Map<Integer,List<Resource>> confictsMap) {
//This is not a bad thing. We need just to ensure constant ordering
//and as there is nothing we can use to distinguish we need to keep
//this information in a list.
Integer hash = Integer.valueOf(a.hashCode());
List<Resource> resources = confictsMap.get(hash);
if(resources == null){ //new conflict ... just add and return
resources = new ArrayList<Resource>(2);
confictsMap.put(hash, resources);
resources.add(a);
resources.add(b);
return -1;
}
//already conflicting resource for this hash present
int aIndex=-1;
int bIndex=-1;
for(int i = 0; i<resources.size() && (aIndex < 0 || bIndex < 0);i++){
Resource r = resources.get(i);
if(aIndex < 0 && r.equals(a)){
aIndex = i;
}
if(bIndex < 0 && r.equals(b)){
bIndex = i;
}
}
if(aIndex < 0){ //a not found
aIndex = resources.size();
resources.add(a);
}
if(bIndex < 0){ //b not found
bIndex = resources.size();
resources.add(b);
}
return aIndex < bIndex ? -1 : 1;
}
/**
* Compares Resources to correctly sort them within the index.<p>
* Sort criteria are:<ol>
* <li> URIs are sorted by the {@link UriRef#getUnicodeString()} unicode string)
* <li> Literals
* <ol>
* <li> sort by the {@link Literal#getLexicalForm() lixical form}
* <li> sort by {@link PlainLiteral#getLanguage() language} (<code>null</code> value first)
* <li> sort by {@link TypedLiteral#getDataType() type} (<code>null</code> value fist
* </ol>
* <li> BNode
* <ol>
* <li> sorted by their {@link System#identityHashCode(Object) Object hasCode}
* <li> on hasCode conflicts (same hasCode but not equals) a random order is chosen
* and kept in the parsed conflictsMap
* </ol>
* </ol>
* <b>NOTEs</b><ul>
* <li> parsed {@link Resource} are not required to correctly implement
* {@link Object#hashCode() hashCode} and {@link Object#equals(Object) equals}
* <li> parsed {@link UriRef} and {@link BNode} and {@link Literal} MUST NOT
* extend/implement any of the other classes/interfaces. This means that an
* {@link UriRef} MUST NOT implement {@link BNode} nor {@link Literal}
* <li> parsed {@link Literal}s MAY implement {@link PlainLiteral} AND
* {@link TypedLiteral}. This allows wrappers over frameworks that do not
* distinguish between those two literal types to be used with the
* {@link IndexedTripleCollection}.
* </ul>
*
* @param a the first resource to compare
* @param b the second resource to compare
* @param confictsMap the map used to resolve BNodes with hasCode conflicts
* @return
*/
protected static int compare(Resource a, Resource b, Map<Integer,List<Resource>> confictsMap){
//Handle special cases for MAX and MIN values
if(a == MIN || b == MAX) {
return -1 ;
} else if(a == MAX || b == MIN){
return 1;
}
//sort (0) UriRefs < (1) Literals (PlainLiterals & TypedLiterals) < (3) BNodes
int at = a instanceof UriRef ? 0 : a instanceof Literal ? 1 : 2;
int bt = b instanceof UriRef ? 0 : b instanceof Literal ? 1 : 2;
if(at == bt){ //same type sort the different types
if(at < 2){ //no BNode
//sort in alphabetic order of the string representation
String as = at == 0 ? ((UriRef)a).getUnicodeString() :
((Literal)a).getLexicalForm();
String bs = bt == 0 ? ((UriRef)b).getUnicodeString() :
((Literal)b).getLexicalForm();
int sc = as.compareTo(bs);
if(sc == 0 && at == 1){ //same string value and Literals
//check if the language and types are the same
Language al = a instanceof PlainLiteral ? ((PlainLiteral)a).getLanguage() : null;
Language bl = b instanceof PlainLiteral ? ((PlainLiteral)b).getLanguage() : null;
//first try to sort by language
if(al == null){
sc = bl == null ? 0 : -1;
} else if(bl == null){
sc = 1;
} else {
sc = al.toString().compareTo(bl.toString());
}
if(sc == 0){
//if still equals look at the dataType
UriRef adt = a instanceof TypedLiteral ? ((TypedLiteral)a).getDataType() : null;
UriRef bdt = b instanceof TypedLiteral ? ((TypedLiteral)b).getDataType() : null;
if(adt == null){
sc = bdt == null ? 0 : -1;
} else if(bdt == null){
sc = 1;
} else {
sc = adt.getUnicodeString().compareTo(bdt.getUnicodeString());
}
}
return sc;
} else { //for UriRefs return the string compare
return sc;
}
} else { //handle BNodes
//sort BNodes based on hashCode
int ah = a.hashCode();
int bh = b.hashCode();
if(ah == bh){
if(!a.equals(b)){
//if implementations hash is the same, but the instances
//are not equals, try to sort them by identity hash code
int ash = System.identityHashCode(a);
int bsh = System.identityHashCode(b);
if(ash == bsh){ //if those are still the same, we need
//to resolve the hashCode conflict by memorise the
//decision in a confilctMap
return resolveBNodeHashConflict(a, b, confictsMap);
} else {
return ash < bsh ? -1 : 1;
}
} else { //same hash and equals
return 0;
}
} else { //sort by hash
return ah < bh ? -1 : 1;
}
}
} else {
return at < bt ? -1 : 1;
}
}
}