blob: 3a6cad8f4ede5b1eda1f61dd798c7f98a29cd0b2 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.commons.indexedgraph;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.clerezza.commons.rdf.BlankNode;
import org.apache.clerezza.commons.rdf.Language;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.RDFTerm;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
import org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.test.GraphTest;
import org.apache.clerezza.rdf.ontologies.FOAF;
import org.apache.clerezza.rdf.ontologies.RDF;
import org.apache.clerezza.rdf.ontologies.RDFS;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class IndexedGraphTest extends GraphTest {
protected static final Logger log = LoggerFactory.getLogger(IndexedGraphTest.class);
private IRI uriRef1 = new IRI("http://example.org/foo");
private IRI uriRef2 = new IRI("http://example.org/bar");
private IRI uriRef3 = new IRI("http://example.org/test");
private Triple triple1 = new TripleImpl(uriRef1, uriRef2, uriRef3);
private Triple triple2 = new TripleImpl(uriRef2, uriRef2, uriRef1);
private Triple triple3 = new TripleImpl(uriRef3, uriRef1, uriRef3);
private Triple triple4 = new TripleImpl(uriRef1, uriRef3, uriRef2);
private Triple triple5 = new TripleImpl(uriRef2, uriRef3, uriRef2);
@Override
protected Graph getEmptyGraph() {
return new IndexedGraph();
}
@Test
public void bNodeConsitency() {
Graph mGraph = getEmptyGraph();
final BlankNode bNode = new BlankNode() {
@Override
public int hashCode() {
return -1;
}
@Override
public boolean equals(Object o) {
return o instanceof BlankNode;
}
};
final BlankNode bNodeClone = new BlankNode() {
@Override
public int hashCode() {
return -1;
}
@Override
public boolean equals(Object o) {
return o instanceof BlankNode;
}
};
mGraph.add(new TripleImpl(bNode, uriRef1, uriRef2));
mGraph.add(new TripleImpl(bNodeClone, uriRef2, uriRef3));
BlankNodeOrIRI bNodeBack = mGraph.filter(null, uriRef1, uriRef2).next().getSubject();
Assert.assertEquals("The bnode we get back is not equals to the one we added", bNode, bNodeBack);
BlankNodeOrIRI bNodeBack2 = mGraph.filter(null, uriRef2, uriRef3).next().getSubject();
Assert.assertEquals("The returnned bnodes are no longer equals", bNodeBack, bNodeBack2);
Assert.assertTrue("Not finding a triple when searching with equal bNode", mGraph.filter(bNodeBack, uriRef2, null).hasNext());
}
@Test
public void iteratorRemove() {
Graph itc = new IndexedGraph();
itc.add(triple1);
itc.add(triple2);
itc.add(triple3);
itc.add(triple4);
itc.add(triple5);
Iterator<Triple> iter = itc.iterator();
while (iter.hasNext()) {
iter.next();
iter.remove();
}
Assert.assertEquals(0, itc.size());
}
@Test
public void removeAll() {
Graph itc = new IndexedGraph();
itc.add(triple1);
itc.add(triple2);
itc.add(triple3);
itc.add(triple4);
itc.add(triple5);
Graph itc2 = new IndexedGraph();
itc2.add(triple1);
itc2.add(triple3);
itc2.add(triple5);
itc.removeAll(itc2);
Assert.assertEquals(2, itc.size());
}
@Test
public void filterIteratorRemove() {
Graph itc = new IndexedGraph();
itc.add(triple1);
itc.add(triple2);
itc.add(triple3);
itc.add(triple4);
itc.add(triple5);
Iterator<Triple> iter = itc.filter(uriRef1, null, null);
while (iter.hasNext()) {
iter.next();
iter.remove();
}
Assert.assertEquals(3, itc.size());
}
@Test(expected=ConcurrentModificationException.class)
public void remove() {
Graph itc = new IndexedGraph();
itc.add(triple1);
itc.add(triple2);
itc.add(triple3);
itc.add(triple4);
itc.add(triple5);
Iterator<Triple> iter = itc.filter(uriRef1, null, null);
while (iter.hasNext()) {
Triple triple = iter.next();
itc.remove(triple);
}
Assert.assertEquals(3, itc.size());
}
/**
* Holds the test data to perform
* {@link Graph#filter(BlankNodeOrIRI, IRI, RDFTerm)}
* tests on {@link Graph} implementations
* @author rwesten
*/
public static final class TestCase {
public final List<BlankNodeOrIRI> subjects;
public final List<RDFTerm> objects;
public final List<IRI> predicates;
/**
* Create a new Test with a maximum number of subjects, predicates and
* objects based on data in the parsed triple collection
* @param tc the data
* @param sNum the maximum number of subjects
* @param pNum the maximum number of predicates
* @param oNum the maximum number of objects
*/
public TestCase(Graph tc,int sNum, int pNum, int oNum){
Set<BlankNodeOrIRI> subjects = new LinkedHashSet<BlankNodeOrIRI>();
Set<RDFTerm> objects = new LinkedHashSet<RDFTerm>();
Set<IRI> predicates = new LinkedHashSet<IRI>();
for(Iterator<Triple> it = tc.iterator();it.hasNext();){
Triple t = it.next();
if(subjects.size() < 100){
subjects.add(t.getSubject());
}
if(predicates.size() < 5){
predicates.add(t.getPredicate());
}
if(objects.size() < 100){
objects.add(t.getObject());
}
}
this.subjects = Collections.unmodifiableList(
new ArrayList<BlankNodeOrIRI>(subjects));
this.predicates = Collections.unmodifiableList(
new ArrayList<IRI>(predicates));
this.objects = Collections.unmodifiableList(
new ArrayList<RDFTerm>(objects));
}
}
@Test
public void testPerformance(){
//Reduced values to fix STANBOL-
Set<Triple> graph = new HashSet<Triple>();
int iterations = 100; //reduced from 1000
int graphsize = 100000;
Long seed = System.currentTimeMillis();
log.info("Test Seed: {}",seed);
createGraph(graph, graphsize, seed);
log.info("Load Time ({} triples)", graph.size());
long start = System.currentTimeMillis();
Graph sg = new SimpleGraph(graph);
log.info(" ... {}: {}",sg.getClass().getSimpleName(), System.currentTimeMillis()-start);
start = System.currentTimeMillis();
Graph ig = new IndexedGraph(graph);
log.info(" ... {}: {}",ig.getClass().getSimpleName(), System.currentTimeMillis()-start);
//Simple ImmutableGraph reference test
TestCase testCase = new TestCase(sg, 20, 5, 20); //reduced form 100,5,100
log.info("Filter Performance Test (graph size {} triples, iterations {})",graphsize,iterations);
log.info(" --- TEST {} with {} triples ---",sg.getClass().getSimpleName(),sg.size());
start = System.currentTimeMillis();
List<Long> sgr = executeTest(sg, testCase, iterations);
log.info(" --- TEST completed in {}ms",System.currentTimeMillis()-start);
log.info(" --- TEST {} {} triples ---",ig.getClass().getSimpleName(),sg.size());
start = System.currentTimeMillis();
List<Long> igr = executeTest(ig, testCase, iterations);
log.info(" --- TEST completed in {}ms",System.currentTimeMillis()-start);
Assert.assertEquals(sgr, igr); //validate filter implementation
}
public List<Long> executeTest(Graph graph, TestCase test, int testCount){
List<Long> testResults = new ArrayList<Long>();
long start;
long resultCount;
//[S,P,O]
start = System.currentTimeMillis();
resultCount = testSPO(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [S,P,O] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[S,P,n]
start = System.currentTimeMillis();
resultCount = testSPn(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [S,P,n] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[S,n,O]
start = System.currentTimeMillis();
resultCount = testSnO(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [S,n,O] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[n,P,O]
start = System.currentTimeMillis();
resultCount = testnPO(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [n,P,O] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[S,n,n]
start = System.currentTimeMillis();
resultCount = testSnn(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [S,n,n] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[n,P,n]
start = System.currentTimeMillis();
resultCount = testnPn(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [n,P,n] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
//[n,n,O]
start = System.currentTimeMillis();
resultCount = testnnO(graph, test, testCount);
testResults.add(new Long(resultCount));
log.info("... run [n,n,O] in {}ms with {} results",System.currentTimeMillis()-start,resultCount);
return testResults;
}
private long testSPO(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int si = -1;
int pi = -1;
int oi;
for(int num = 0;num < testCount;num++){
oi = num%test.objects.size();
if(oi == 0) {
pi++;
pi = pi%test.predicates.size();
}
if(pi == 0) {
si++;
si = si%test.subjects.size();
}
it = graph.filter(test.subjects.get(si), test.predicates.get(pi), test.objects.get(oi));
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testSPn(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int si = -1;
int pi = 0;
for(int num = 0;num < testCount;num++){
pi = num%test.predicates.size();
if(pi == 0) {
si++;
si = si%test.subjects.size();
}
it = graph.filter(test.subjects.get(si), test.predicates.get(pi), null);
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testSnO(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int si = -1;
int oi;
for(int num = 0;num < testCount;num++){
oi = num%test.objects.size();
if(oi == 0) {
si++;
si = si%test.subjects.size();
}
it = graph.filter(test.subjects.get(si), null, test.objects.get(oi));
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testnPO(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int pi = -1;
int oi;
for(int num = 0;num < testCount;num++){
oi = num%test.objects.size();
if(oi == 0) {
pi++;
pi = pi%test.predicates.size();
}
it = graph.filter(null, test.predicates.get(pi), test.objects.get(oi));
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testSnn(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int si = 0;
for(int num = 0;num < testCount;num++){
si = num%test.subjects.size();
it = graph.filter(test.subjects.get(si), null, null);
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testnPn(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int pi;
for(int num = 0;num < testCount;num++){
pi = num%test.predicates.size();
it = graph.filter(null, test.predicates.get(pi), null);
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private long testnnO(Graph graph, TestCase test, int testCount) {
Iterator<Triple> it;
long count = 0;
int oi;
for(int num = 0;num < testCount;num++){
oi = num%test.objects.size();
it = graph.filter(null, null, test.objects.get(oi));
while(it.hasNext()){
it.next();
count++;
}
}
return count;
}
private static void createGraph(Collection<Triple> tc, int triples, Long seed){
Random rnd = new Random();
if(seed != null){
rnd.setSeed(seed);
}
LiteralFactory lf = LiteralFactory.getInstance();
//randoms are in the range [0..3]
double l = 1.0; //literal
double i = l / 3; //int
double d = l * 2 / 3;//double
double b = 2.0;//bNode
double nb = b - (l * 2 / 3); //create new bNode
double random;
BlankNodeOrIRI subject = null;
IRI predicate = null;
List<IRI> predicateList = new ArrayList<IRI>();
predicateList.add(RDF.first);
predicateList.add(RDF.rest);
predicateList.add(RDF.type);
predicateList.add(RDFS.label);
predicateList.add(RDFS.comment);
predicateList.add(RDFS.range);
predicateList.add(RDFS.domain);
predicateList.add(FOAF.name);
predicateList.add(FOAF.nick);
predicateList.add(FOAF.homepage);
predicateList.add(FOAF.age);
predicateList.add(FOAF.depiction);
String URI_PREFIX = "http://www.test.org/bigGraph/ref";
Language DE = new Language("de");
Language EN = new Language("en");
Iterator<IRI> predicates = predicateList.iterator();
List<BlankNode> bNodes = new ArrayList<BlankNode>();
bNodes.add(new BlankNode());
for (int count = 0; tc.size() < triples; count++) {
random = rnd.nextDouble() * 3;
if (random >= 2.5 || count == 0) {
if (random <= 2.75) {
subject = new IRI(URI_PREFIX + count);
} else {
int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
subject = bNodes.get(rndIndex);
}
}
if (random > 2.0 || count == 0) {
if (!predicates.hasNext()) {
Collections.shuffle(predicateList,rnd);
predicates = predicateList.iterator();
}
predicate = predicates.next();
}
if (random <= l) { //literal
if (random <= i) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
} else if (random <= d) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
} else {
Literal text;
if (random <= i) {
text = new PlainLiteralImpl("Literal for " + count);
} else if (random <= d) {
text = new PlainLiteralImpl("An English literal for " + count, EN);
} else {
text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
}
tc.add(new TripleImpl(subject, predicate, text));
}
} else if (random <= b) { //bnode
BlankNode bnode;
if (random <= nb) {
bnode = new BlankNode();
bNodes.add(bnode);
} else { //>nb <b
int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
bnode = bNodes.get(rndIndex);
}
tc.add(new TripleImpl(subject, predicate, bnode));
} else { //IRI
tc.add(new TripleImpl(subject, predicate,
new IRI(URI_PREFIX + count * random)));
}
}
}
}