blob: 28eb28ef916f504d0c2d5f0008aed9055366ce81 [file] [log] [blame]
/*
* Copyright 2015 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.clerezza.commons.rdf.impl.sparql;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.logging.Logger;
import org.apache.clerezza.commons.rdf.BlankNode;
import org.apache.clerezza.commons.rdf.BlankNodeOrIRI;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.ImmutableGraph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.clerezza.commons.rdf.RDFTerm;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.impl.utils.AbstractGraph;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph;
/**
*
* @author reto
*/
public class SparqlGraph extends AbstractGraph {
private static final int MAX_ISOMORPHIC_BNODES = 1000;
private static final Logger log = Logger.getLogger(SparqlGraph.class.getName());
final SparqlClient sparqlClient;
/**
* Constructs a Graph representing the default graph at the specified
* endpoint
*/
public SparqlGraph(final String endpoint) {
sparqlClient = new SparqlClient(endpoint);
}
@Override
protected Iterator<Triple> performFilter(final BlankNodeOrIRI filterSubject,
final IRI filterPredicate, final RDFTerm filterObject) {
try {
String query = createQuery(filterSubject, filterPredicate, filterObject);
final List<Map<String, RDFTerm>> sparqlResults = sparqlClient.queryResultSet(query);
//first to triples without bnode-conversion
//rawTriples contains the triples with the BNodes from the result set
final Collection<Triple> rawTriples = new ArrayList<>();
for (Map<String, RDFTerm> result : sparqlResults) {
rawTriples.add(new TripleImpl(filterSubject != null ? filterSubject : (BlankNodeOrIRI) result.get("s"),
filterPredicate != null ? filterPredicate : (IRI) result.get("p"),
filterObject != null ? filterObject : result.get("o")));
}
//then bnode conversion
final Iterator<Triple> rawTriplesIter = rawTriples.iterator();
//this is basically just wokring around the lack of (named) nested functions
return (new Callable<Iterator<Triple>>() {
final Map<BlankNode, SparqlBNode> nodeMap = new HashMap<>();
final Set<ImmutableGraph> usedContext = new HashSet<>();
private RDFTerm useSparqlNode(RDFTerm node) throws IOException {
if (node instanceof BlankNodeOrIRI) {
return useSparqlNode((BlankNodeOrIRI) node);
}
return node;
}
private BlankNodeOrIRI useSparqlNode(BlankNodeOrIRI node) throws IOException {
if (node instanceof BlankNode) {
if (!nodeMap.containsKey(node)) {
createBlankNodesForcontext((BlankNode) node);
}
if (!nodeMap.containsKey(node)) {
throw new RuntimeException("no Bnode created");
}
return nodeMap.get(node);
} else {
return node;
}
}
private void createBlankNodesForcontext(final BlankNode node) throws IOException {
final Collection<Triple> context = getContext(node);
final Set<BlankNode> rawNodes = new HashSet<>();
for (Triple triple : context) {
{
final BlankNodeOrIRI subject = triple.getSubject();
if (subject instanceof BlankNode) {
rawNodes.add((BlankNode) subject);
}
}
{
final RDFTerm object = triple.getObject();
if (object instanceof BlankNode) {
rawNodes.add((BlankNode) object);
}
}
}
final Set<SparqlBNode> createdSparqlNodes = new HashSet<>();
//final Map<BlankNode, SparqlBNode> preliminaryNodes = new HashMap<>();
for (BlankNode rawNode : rawNodes) {
for (int i = 0; i < MAX_ISOMORPHIC_BNODES; i++) {
SparqlBNode sparqlBNode = new SparqlBNode(rawNode, context, i);
if (!createdSparqlNodes.contains(sparqlBNode)) {
nodeMap.put(rawNode, sparqlBNode);
createdSparqlNodes.add(sparqlBNode);
break;
}
}
}
}
private ImmutableGraph getContext(final BlankNode node) throws IOException {
//we need to get the cntext of the BNode
//if the filter was for (null, null, null) we have the whole
//bnode context in the reuslt set, otherwise we need to get
//more triples from the endpoint,
//let's first handle the easy case
if ((filterSubject == null) && (filterPredicate == null)
&& (filterObject == null)) {
return getContextInRaw(node);
} else {
final ImmutableGraph startContext = getContextInRaw(node);
final Set<ImmutableGraph> expandedContexts = expandContext(startContext);
//expand bnode context
//note that there might be different contexts for
//a bnode as present in the current result set
//in this case we just haveto make sure we don't
//pick the same context for different bnodes in the resultset
ImmutableGraph result = null;
for (ImmutableGraph expandedContext : expandedContexts) {
if (!usedContext.contains(expandedContext)) {
result = expandedContext;
break;
}
}
if (result == null) {
log.warning("he underlying sparql graph seems to contain redundant triples, this might cause unexpected results");
result = expandedContexts.iterator().next();
} else {
usedContext.add(result);
}
return result;
}
}
private ImmutableGraph getContextInRaw(BlankNode node) {
final Graph contextBuilder = new SimpleGraph();
for (Triple rawTriple : rawTriples) {
BlankNodeOrIRI rawSubject = rawTriple.getSubject();
RDFTerm rawObject = rawTriple.getObject();
if (rawSubject.equals(node) || rawObject.equals(node)) {
contextBuilder.add(rawTriple);
}
}
return contextBuilder.getImmutableGraph();
}
@Override
public Iterator<Triple> call() throws Exception {
return new Iterator<Triple>() {
@Override
public boolean hasNext() {
return rawTriplesIter.hasNext();
}
@Override
public Triple next() {
try {
Triple rawTriple = rawTriplesIter.next();
return new TripleImpl(useSparqlNode(rawTriple.getSubject()),
rawTriple.getPredicate(),
useSparqlNode(rawTriple.getObject()));
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
};
}
/**
* returns all MSGs that are supergraphs of startContext
*
* @param startContext
* @return
*/
private Set<ImmutableGraph> expandContext(Collection<Triple> startContext) throws IOException {
final StringBuilder queryBuilder = new StringBuilder();
queryBuilder.append("SELECT * WHERE {\n ");
Map<BlankNode, String> bNodeVarNameMap = writeTriplePattern(queryBuilder, startContext);
Set<BlankNode> bNodesInContext = bNodeVarNameMap.keySet();
for (BlankNode bNode : bNodesInContext) {
final String bNodeVarLabel = bNodeVarNameMap.get(bNode);
//looking for outgoing properties of the bnode
queryBuilder.append("OPTIONAL { ");
queryBuilder.append('?');
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(' ');
queryBuilder.append("?po");
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(" ?o");
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(" } .\n");
//looking for incoming properties of the bnode
queryBuilder.append("OPTIONAL { ");
queryBuilder.append("?s");
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(' ');
queryBuilder.append("?pi");
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(" ?");
queryBuilder.append(bNodeVarLabel);
queryBuilder.append(" } .\n");
}
queryBuilder.append(" }");
final List<Map<String, RDFTerm>> expansionQueryResults = sparqlClient.queryResultSet(queryBuilder.toString());
Set<ImmutableGraph> expandedContexts = new HashSet<>();
//the query results may or may be from disjoint supergraphs
//we expand them all as if they are different which may lead
//us to the same MSG multiple times
RESULTS:
for (Map<String, RDFTerm> expansionQueryResult : expansionQueryResults) {
Collection<Triple> expandedContext = new HashSet<>();
Map<BlankNode, BlankNode> newBNodesToOldBNodes = new HashMap<>();
for (BlankNode oldBNode : bNodesInContext) {
final String bNodeVarLabel = bNodeVarNameMap.get(oldBNode);
final RDFTerm newNode = expansionQueryResult.get(bNodeVarLabel);
if (!(newNode instanceof BlankNode)) {
//this subgraph is't a match
continue RESULTS;
}
newBNodesToOldBNodes.put((BlankNode) newNode, oldBNode);
}
expandedContext.addAll(startContext);
boolean newBNodeIntroduced = false;
boolean newTripleAdded = false;
for (BlankNode oldBNode : bNodesInContext) {
final String bNodeVarLabel = bNodeVarNameMap.get(oldBNode);
{
final IRI newPredicate = (IRI) expansionQueryResult.get("po" + bNodeVarLabel);
if (newPredicate != null) {
RDFTerm newObject = expansionQueryResult.get("o" + bNodeVarLabel);
if (newObject instanceof BlankNode) {
if (newBNodesToOldBNodes.containsKey(newObject)) {
//point back to BNode in startContext
newObject = newBNodesToOldBNodes.get(newObject);
} else {
newBNodeIntroduced = true;
}
}
if (expandedContext.add(new TripleImpl(oldBNode, newPredicate, newObject))) {
newTripleAdded = true;
}
}
}
{
final IRI newPredicate = (IRI) expansionQueryResult.get("pi" + bNodeVarLabel);
if (newPredicate != null) {
RDFTerm newSubject = expansionQueryResult.get("s" + bNodeVarLabel);
if (newSubject instanceof BlankNode) {
if (newBNodesToOldBNodes.containsKey(newSubject)) {
//point back to BNode in startContext
newSubject = newBNodesToOldBNodes.get(newSubject);
} else {
newBNodeIntroduced = true;
}
}
if (expandedContext.add(new TripleImpl((BlankNodeOrIRI) newSubject, newPredicate, oldBNode))) {
newTripleAdded = true;
}
}
}
}
if (newBNodeIntroduced) {
//we could be more efficient than this ans just expand the newly introduced bnodes
expandedContexts.addAll(expandContext(expandedContext));
} else {
if (newTripleAdded) {
//look for more results
expandedContexts.addAll(expandContext(expandedContext));
//continued iteration obsoleted by recursion
break;
}
}
}
if (expandedContexts.isEmpty()) {
expandedContexts.add(new SimpleGraph(startContext).getImmutableGraph());
}
return expandedContexts;
}
}).call();
} catch (AlienBNodeException e) {
return new Iterator<Triple>() {
@Override
public boolean hasNext() {
return false;
}
@Override
public Triple next() {
throw new NoSuchElementException();
}
};
} catch (IOException ex) {
throw new RuntimeException(ex);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
private String createQuery(final BlankNodeOrIRI filterSubject, final IRI filterPredicate, final RDFTerm filterObject) {
final StringBuilder selectBuilder = new StringBuilder();
selectBuilder.append("SELECT ");
final StringBuilder whereBuilder = new StringBuilder();
whereBuilder.append("WHERE { ");
if (filterSubject == null) {
whereBuilder.append("?s");
selectBuilder.append("?s ");
} else {
if (filterSubject instanceof SparqlBNode) {
whereBuilder.append("?sn");
} else {
whereBuilder.append(asSparqlTerm(filterSubject));
}
}
whereBuilder.append(' ');
if (filterPredicate == null) {
whereBuilder.append("?p");
selectBuilder.append("?p ");
} else {
whereBuilder.append(asSparqlTerm(filterPredicate));
}
whereBuilder.append(' ');
if (filterObject == null) {
whereBuilder.append("?o");
selectBuilder.append("?o ");
} else {
if (filterObject instanceof SparqlBNode) {
whereBuilder.append("?on");
} else {
whereBuilder.append(asSparqlTerm(filterObject));
}
}
whereBuilder.append(" .\n");
if (filterSubject instanceof SparqlBNode) {
//expand bnode context
writeTriplePattern(whereBuilder, ((SparqlBNode) filterSubject).context, "sn");
}
if (filterObject instanceof SparqlBNode) {
//expand bnode context
writeTriplePattern(whereBuilder, ((SparqlBNode) filterObject).context, "on");
}
whereBuilder.append(" }");
return selectBuilder.append(whereBuilder).toString();
}
@Override
protected int performSize() {
try {
//TODO replace this with count
return sparqlClient.queryResultSet("SELECT * WHERE { ?s ?p ?o}").size();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
private String asSparqlTerm(IRI iri) {
return "<" + iri.getUnicodeString() + ">";
}
private String asSparqlTerm(Literal literal) {
//TODO langauge and datatype
return "\"" + literal.getLexicalForm().replace("\n", "\\n").replace("\"", "\\\"") + "\"";
}
private String asSparqlTerm(BlankNode bnode) {
if (!(bnode instanceof SparqlBNode)) {
throw new AlienBNodeException();
}
//this requires adding additional clauses to the graph pattern
throw new RuntimeException("SparqlBNodes should have been handled earlier");
}
private String asSparqlTerm(BlankNodeOrIRI term) {
if (term instanceof IRI) {
return asSparqlTerm((IRI) term);
} else {
return asSparqlTerm((BlankNode) term);
}
}
private String asSparqlTerm(RDFTerm term) {
if (term instanceof BlankNodeOrIRI) {
return asSparqlTerm((BlankNodeOrIRI) term);
} else {
return asSparqlTerm((Literal) term);
}
}
private Map<BlankNode, String> writeTriplePattern(StringBuilder queryBuilder, Collection<Triple> triples) {
return writeTriplePattern(queryBuilder, triples, null);
}
private Map<BlankNode, String> writeTriplePattern(StringBuilder queryBuilder, Collection<Triple> triples, String varLabelForInternalBNodeId) {
final Collection<String> triplePatterns = new ArrayList<>();
int varCounter = 0;
final Map<BlankNode, String> bNodeVarNameMap = new HashMap<>();
for (Triple t : triples) {
final StringBuilder builder = new StringBuilder();
{
final BlankNodeOrIRI s = t.getSubject();
String varName;
if (s instanceof BlankNode) {
if (bNodeVarNameMap.containsKey(s)) {
varName = bNodeVarNameMap.get(s);
} else {
varName = "v" + (varCounter++);
bNodeVarNameMap.put((BlankNode) s, varName);
}
builder.append('?');
builder.append(varName);
} else {
if (s.equals(SparqlBNode.internalBNodeId)) {
builder.append('?');
builder.append(varLabelForInternalBNodeId);
} else {
builder.append(asSparqlTerm(s));
}
}
}
builder.append(' ');
builder.append(asSparqlTerm(t.getPredicate()));
builder.append(' ');
{
final RDFTerm o = t.getObject();
String varName;
if (o instanceof BlankNode) {
if (bNodeVarNameMap.containsKey(o)) {
varName = bNodeVarNameMap.get(o);
} else {
varName = "v" + (varCounter++);
bNodeVarNameMap.put((BlankNode) o, varName);
}
builder.append('?');
builder.append(varName);
} else {
if (o.equals(SparqlBNode.internalBNodeId)) {
builder.append('?');
builder.append(varLabelForInternalBNodeId);
} else {
builder.append(asSparqlTerm(o));
}
}
}
builder.append('.');
triplePatterns.add(builder.toString());
}
for (String triplePattern : triplePatterns) {
queryBuilder.append(triplePattern);
queryBuilder.append('\n');
}
return bNodeVarNameMap;
}
private static class AlienBNodeException extends RuntimeException {
public AlienBNodeException() {
}
}
}