JENA-1899: TDB1 Storage and query
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/core/VarAlloc.java b/jena-arq/src/main/java/org/apache/jena/sparql/core/VarAlloc.java
index 17d8653..2c3f4c8 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/core/VarAlloc.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/core/VarAlloc.java
@@ -47,8 +47,6 @@
this.baseMarker = baseMarker ;
}
-
-
public Var allocVar()
{ return alloc(baseMarker, counter ++) ; }
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/ExecutionContext.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/ExecutionContext.java
index ff357db..7006317 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/ExecutionContext.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/ExecutionContext.java
@@ -18,114 +18,97 @@
package org.apache.jena.sparql.engine;
-import java.util.ArrayList ;
-import java.util.Collection ;
-import java.util.Iterator ;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
-import org.apache.jena.graph.Graph ;
-import org.apache.jena.sparql.core.DatasetGraph ;
-import org.apache.jena.sparql.engine.main.OpExecutorFactory ;
-import org.apache.jena.sparql.function.FunctionEnv ;
-import org.apache.jena.sparql.util.Context ;
+import org.apache.jena.graph.Graph;
+import org.apache.jena.sparql.core.DatasetGraph;
+import org.apache.jena.sparql.engine.main.OpExecutorFactory;
+import org.apache.jena.sparql.function.FunctionEnv;
+import org.apache.jena.sparql.util.Context;
public class ExecutionContext implements FunctionEnv
{
- private static boolean TrackAllIterators = false ;
+ private static boolean TrackAllIterators = false;
- private Context context = null ;
- private DatasetGraph dataset = null ;
+ private Context context = null;
+ private DatasetGraph dataset = null;
// Iterator tracking
- private Collection<QueryIterator> openIterators = null ;
+ private Collection<QueryIterator> openIterators = null;
// Tracking all iterators leads to a build up of state,
- private Collection<QueryIterator> allIterators = null ;
- private Graph activeGraph = null ;
- private OpExecutorFactory executor = null ;
+ private Collection<QueryIterator> allIterators = null;
+ private Graph activeGraph = null;
+ private OpExecutorFactory executor = null;
/** Clone */
public ExecutionContext(ExecutionContext other)
{
- this.context = other.context ;
- this.dataset = other.dataset ;
- this.openIterators = other.openIterators ;
- this.allIterators = other.allIterators ;
- this.activeGraph = other.activeGraph ;
- this.executor = other.executor ;
+ this.context = other.context;
+ this.dataset = other.dataset;
+ this.openIterators = other.openIterators;
+ this.allIterators = other.allIterators;
+ this.activeGraph = other.activeGraph;
+ this.executor = other.executor;
}
/** Clone and change active graph - shares tracking */
public ExecutionContext(ExecutionContext other, Graph activeGraph)
{
- this(other) ;
- this.activeGraph = activeGraph ;
+ this(other);
+ this.activeGraph = activeGraph;
}
- public ExecutionContext(Context params, Graph activeGraph, DatasetGraph dataset, OpExecutorFactory factory)
- {
- this.context = params ;
- this.dataset = dataset ;
- this.openIterators = new ArrayList<>() ;
+ public ExecutionContext(Context params, Graph activeGraph, DatasetGraph dataset, OpExecutorFactory factory) {
+ this.context = params;
+ this.dataset = dataset;
+ this.openIterators = new ArrayList<>();
if ( TrackAllIterators )
- this.allIterators = new ArrayList<>() ;
- this.activeGraph = activeGraph ;
- this.executor = factory ;
+ this.allIterators = new ArrayList<>();
+ this.activeGraph = activeGraph;
+ this.executor = factory;
}
@Override
- public Context getContext() { return context ; }
+ public Context getContext() { return context; }
-// public ExecutionContext getExecutionContext() { return this ; }
-
-
- public void openIterator(QueryIterator qIter)
- {
- openIterators.add(qIter) ;
+ public void openIterator(QueryIterator qIter) {
+ openIterators.add(qIter);
if ( allIterators != null )
- allIterators.add(qIter) ;
+ allIterators.add(qIter);
}
- public void closedIterator(QueryIterator qIter)
- {
- openIterators.remove(qIter) ;
+ public void closedIterator(QueryIterator qIter) {
+ openIterators.remove(qIter);
}
- public Iterator<QueryIterator> listOpenIterators() { return openIterators.iterator() ; }
- public Iterator<QueryIterator> listAllIterators()
- {
- if ( allIterators == null ) return null ;
- return allIterators.iterator() ;
+ public Iterator<QueryIterator> listOpenIterators() {
+ return openIterators.iterator();
}
-
- public OpExecutorFactory getExecutor()
- {
- return executor ;
+
+ public Iterator<QueryIterator> listAllIterators() {
+ if ( allIterators == null )
+ return null;
+ return allIterators.iterator();
+ }
+
+ public OpExecutorFactory getExecutor() {
+ return executor;
}
/** Setter for the policy for algebra expression evaluation - use with care */
- public void setExecutor(OpExecutorFactory executor)
- {
- this.executor = executor ;
+ public void setExecutor(OpExecutorFactory executor) {
+ this.executor = executor;
}
@Override
- public DatasetGraph getDataset() { return dataset ; }
-
-// /** Setter for the dataset - use with care */
-// public void setDataset(DatasetGraph dataset)
-// {
-// this.dataset = dataset ;
-// }
+ public DatasetGraph getDataset() { return dataset; }
/** Return the active graph (the one matching is against at this point in the query.
* May be null if unknown or not applicable - for example, doing quad store access or
* when sorting
*/
@Override
- public Graph getActiveGraph() { return activeGraph ; }
-
-// /** Setter for the active graph - use with care */
-// public void setActiveGraph(Graph activeGraph)
-// {
-// this.activeGraph = activeGraph ;
-// }
+ public Graph getActiveGraph() { return activeGraph; }
}
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterAddTripleTerm.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterAddTripleTerm.java
index 0b36285..df3f490 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterAddTripleTerm.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/QueryIterAddTripleTerm.java
@@ -37,20 +37,18 @@
* with terms from the current binding. It is an error not to have substitutions for
* all variables and results in the original binding unchanged.
*/
-public class QueryIterAddTripleTerm extends QueryIterTriplePattern {
+public class QueryIterAddTripleTerm extends QueryIterConvert {
private final Triple triple;
private final Var var;
public QueryIterAddTripleTerm(QueryIterator chain, Var var, Triple triple, ExecutionContext execContext) {
- super(chain, triple, execContext);
+ super(chain, b->convert(var, triple, b), execContext);
this.triple = triple;
this.var = var;
}
- @Override
- protected Binding moveToNextBinding() {
- Binding binding = super.moveToNextBinding();
- Triple matchedTriple = Substitute.substitute(triple, binding);
+ private static Binding convert(Var var, Triple triple, Binding binding) {
+ Triple matchedTriple = Substitute.substitute(triple, binding);
if ( ! matchedTriple.isConcrete() )
// Not all concrete terms.
return binding;
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/RX.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/RX.java
index e00ea78..9f1a661 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/RX.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/iterator/RX.java
@@ -46,27 +46,6 @@
static String allocTripleTerms = "*";
static VarAlloc varAlloc = new VarAlloc(allocTripleTerms) ;
- // QueryIterator - acceptable as API as universal.
- // Internal TDB is a delayed binding iterator.
- // <Triple, Node> vs <Tuple, NodeId>
- // TDB: Iterator<BindingNodeId>
-
- // TDB: StageMatchTuple ~~ QueryIterTriplePattern
-
- // TDB: SolverLib.solve
- // The two solver libs are identical except for imports.
- /*
- public static Iterator<BindingNodeId> solve(NodeTupleTable nodeTupleTable,
- Tuple<Node> tuple,
- boolean anyGraph,
- Iterator<BindingNodeId> chain, Predicate<Tuple<NodeId>> filter,
- ExecutionContext execCxt)
- {
- return new StageMatchTuple(nodeTupleTable, chain, tuple, anyGraph, filter, execCxt) ;
- }
- */
-
-
/**
* Match a single triple pattern that may involve RDF* terms.
* This is the top level function for matching triples.
@@ -78,9 +57,8 @@
* @implNote
* Without RDF*, this would be a plain call of {@link #matchData} which is simply:
* <pre>
- * new QueryIterTriplePattern(chain, triple, execContext)}
+ * new QueryIterTriplePattern(chain, triple, execCxt)}
* </pre>
- *
*/
public static QueryIterator rdfStarTriple(QueryIterator chain, Triple triple, ExecutionContext execCxt) {
// Should all work without this trap for plain RDF but for now,
@@ -98,9 +76,9 @@
* variable, and put allocated variable in to main triple pattern. Do for subject
* and object positions, and also any nested triple pattern terms.
*/
- private static QueryIterator rdfStarTripleSub(QueryIterator chain, Triple triple, ExecutionContext execContext) {
- Pair<QueryIterator, Triple> pair = preprocess(chain, triple, execContext);
- QueryIterator chain2 = matchData(pair.getLeft(), pair.getRight(), execContext);
+ private static QueryIterator rdfStarTripleSub(QueryIterator chain, Triple triple, ExecutionContext execCxt) {
+ Pair<QueryIterator, Triple> pair = preprocessForTripleTerms(chain, triple, execCxt);
+ QueryIterator chain2 = matchData(pair.getLeft(), pair.getRight(), execCxt);
return chain2;
}
@@ -108,30 +86,23 @@
* Match a triple pattern (which may have nested triple terms in it).
* Any matched triples are added as triple terms bound to the supplied variable.
*/
- public static QueryIterator matchTripleStar(QueryIterator chain, Var var, Triple triple, ExecutionContext execContext) {
+ public static QueryIterator matchTripleStar(QueryIterator chain, Var var, Triple triple, ExecutionContext execCxt) {
if ( tripleHasNodeTriple(triple) ) {
- Pair<QueryIterator, Triple> pair = preprocess(chain, triple, execContext);
+ Pair<QueryIterator, Triple> pair = preprocessForTripleTerms(chain, triple, execCxt);
chain = pair.getLeft();
triple = pair.getRight();
}
- // Assign to var in each binding, based on the triple pattern grounded by the match.
- QueryIterator qIter = bindTripleTerm(chain, var, triple, execContext);
+ // Match to data and assign to var in each binding, based on the triple pattern grounded by the match.
+ QueryIterator qIter = bindTripleTerm(chain, var, triple, execCxt);
return qIter;
}
- // If we assume the data is correct (in PG mode), no need to test for the triple
- // of a concrete Node_Triple because we are able to test for it in the triple
- // pattern itself. This should be "false".
- // XXX To be removed.
- private static final boolean TEST_FOR_CONCRETE_TRIPLE_TERM = false;
-
/**
* Process a triple for triple terms.
* <p>
* This creates additional matchers for triple terms in the pattern triple recursively.
*/
- private static Pair<QueryIterator, Triple> preprocess(QueryIterator chain, Triple patternTriple, ExecutionContext execContext) {
- Triple triple2 = patternTriple;
+ private static Pair<QueryIterator, Triple> preprocessForTripleTerms(QueryIterator chain, Triple patternTriple, ExecutionContext execCxt) {
Node s = patternTriple.getSubject();
Node p = patternTriple.getPredicate();
Node o = patternTriple.getObject();
@@ -139,25 +110,24 @@
Node o1 = null;
// Recurse.
- if ( s.isNodeTriple() ) {
- if ( TEST_FOR_CONCRETE_TRIPLE_TERM || ! s.isConcrete() ) {
- Triple t2 = triple(s);
- Var var = varAlloc.allocVar();
- Triple tripleTerm = Triple.create(t2.getSubject(), t2.getPredicate(), t2.getObject());
- chain = matchTripleStar(chain, var, tripleTerm, execContext);
- s1 = var;
- }
+ if ( s.isNodeTriple() && ! s.isConcrete() ) {
+ Triple t2 = triple(s);
+ Var var = varAlloc.allocVar();
+ Triple tripleTerm = Triple.create(t2.getSubject(), t2.getPredicate(), t2.getObject());
+ chain = matchTripleStar(chain, var, tripleTerm, execCxt);
+ s1 = var;
}
- if ( o.isNodeTriple() ) {
- if ( TEST_FOR_CONCRETE_TRIPLE_TERM || ! o.isConcrete() ) {
- Triple t2 = triple(o);
- Var var = varAlloc.allocVar();
- Triple tripleTerm = Triple.create(t2.getSubject(), t2.getPredicate(), t2.getObject());
- chain = matchTripleStar(chain, var, tripleTerm, execContext);
- o1 = var;
- }
+ if ( o.isNodeTriple() && ! o.isConcrete() ) {
+ Triple t2 = triple(o);
+ Var var = varAlloc.allocVar();
+ Triple tripleTerm = Triple.create(t2.getSubject(), t2.getPredicate(), t2.getObject());
+ chain = matchTripleStar(chain, var, tripleTerm, execCxt);
+ o1 = var;
}
+ // Because of the test in rdfStarTriple,
+ // This code only happens when there is a a triple term.
+
// No triple term in this triple.
if ( s1 == null && o1 == null )
return Pair.create(chain, patternTriple);
@@ -172,21 +142,22 @@
}
/**
- * Match the graph with a triple pattern.
- * This is the accessor the graph.
- * It assumes any triple terms have been dealt with.
- */
- private static QueryIterator matchData(QueryIterator chain, Triple triple, ExecutionContext execContext) {
- QueryIterator qIter = new QueryIterTriplePattern(chain, triple, execContext);
- return qIter;
- }
-
- /**
* Add a binding to each row with triple grounded by the current row.
* If the triple isn't concrete, then just return the row as-is.
*/
- private static QueryIterator bindTripleTerm(QueryIterator chain, Var var, Triple triple, ExecutionContext execContext) {
- return new QueryIterAddTripleTerm(chain, var, triple, execContext);
+ private static QueryIterator bindTripleTerm(QueryIterator chain, Var var, Triple pattern, ExecutionContext execCxt) {
+ QueryIterator qIter = matchData(chain, pattern, execCxt);
+ QueryIterator qIter2 = new QueryIterAddTripleTerm(qIter, var, pattern, execCxt);
+ return qIter2;
+ }
+
+ /**
+ * Match the graph with a triple pattern.
+ * This is the accessor to the graph.
+ * It assumes any triple terms have been dealt with.
+ */
+ private static QueryIterator matchData(QueryIterator chain, Triple pattern, ExecutionContext execCxt) {
+ return new QueryIterTriplePattern(chain, pattern, execCxt);
}
/**
diff --git a/jena-core/src/main/java/org/apache/jena/graph/Node_Triple.java b/jena-core/src/main/java/org/apache/jena/graph/Node_Triple.java
index e3a215e..89fd08c 100644
--- a/jena-core/src/main/java/org/apache/jena/graph/Node_Triple.java
+++ b/jena-core/src/main/java/org/apache/jena/graph/Node_Triple.java
@@ -71,7 +71,7 @@
public boolean isNodeTriple() {
return true;
}
-
+
@Override
public String toString(PrefixMapping pm, boolean quoting) {
return "<< " + label.toString() + " >>";
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/base/objectfile/StringFile.java b/jena-tdb/src/main/java/org/apache/jena/tdb/base/objectfile/StringFile.java
index bdd3b28..f7a0373 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/base/objectfile/StringFile.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/base/objectfile/StringFile.java
@@ -103,7 +103,6 @@
}
} ;
// ----
-
// URI compression can be effective but literals are more of a problem. More variety.
public final static boolean compression = false ;
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/lib/NodeLib.java b/jena-tdb/src/main/java/org/apache/jena/tdb/lib/NodeLib.java
index 284f286..90f2e33 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/lib/NodeLib.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/lib/NodeLib.java
@@ -27,12 +27,10 @@
import java.util.Iterator ;
import org.apache.jena.atlas.iterator.Iter ;
-import org.apache.jena.atlas.lib.Bytes ;
-import org.apache.jena.atlas.lib.Pool ;
-import org.apache.jena.atlas.lib.PoolBase ;
-import org.apache.jena.atlas.lib.PoolSync ;
+import org.apache.jena.atlas.lib.*;
import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.graph.Node ;
+import org.apache.jena.riot.out.NodeFmtLib;
import org.apache.jena.sparql.util.NodeUtils ;
import org.apache.jena.tdb.TDBException ;
import org.apache.jena.tdb.base.objectfile.ObjectFile ;
@@ -146,6 +144,11 @@
}
hash(h, n.getLiteralLexicalForm(), n.getLiteralLanguage(), dt, nt);
return;
+ case TRIPLETERM: {
+ String lex = NodeFmtLib.str(n);
+ hash(h, lex, null, null, nt);
+ return;
+ }
case OTHER :
throw new TDBException("Attempt to hash something strange: " + n);
}
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/solver/SolverRX.java b/jena-tdb/src/main/java/org/apache/jena/tdb/solver/SolverRX.java
new file mode 100644
index 0000000..76bd603
--- /dev/null
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/solver/SolverRX.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.tdb.solver;
+
+import static org.apache.jena.graph.Node_Triple.triple;
+
+import java.util.Iterator;
+import java.util.function.Predicate;
+
+import org.apache.jena.atlas.iterator.Iter;
+import org.apache.jena.atlas.lib.Pair;
+import org.apache.jena.atlas.lib.tuple.Tuple;
+import org.apache.jena.atlas.lib.tuple.TupleFactory;
+import org.apache.jena.atlas.logging.FmtLog;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.graph.Triple;
+import org.apache.jena.sparql.ARQException;
+import org.apache.jena.sparql.core.Quad;
+import org.apache.jena.sparql.core.Var;
+import org.apache.jena.sparql.core.VarAlloc;
+import org.apache.jena.sparql.engine.ExecutionContext;
+import org.apache.jena.sparql.engine.iterator.RX;
+import org.apache.jena.tdb.TDBException;
+import org.apache.jena.tdb.store.NodeId;
+import org.apache.jena.tdb.store.nodetable.NodeTable;
+import org.apache.jena.tdb.store.nodetupletable.NodeTupleTable;
+
+/**
+ * See {@link RX} which is the same algorithm for Triple/Node space.
+ */
+public class SolverRX {
+
+ // These argument get passe around a lot, makign the argument lists long.
+ private static class SolverCxt {
+ final NodeTupleTable nodeTupleTable;
+ final boolean anyGraphs;
+ final Predicate<Tuple<NodeId>> filter;
+ final ExecutionContext execCxt;
+ SolverCxt(NodeTupleTable nodeTupleTable, boolean anyGraphs, Predicate<Tuple<NodeId>> filter, ExecutionContext execCxt) {
+ super();
+ this.nodeTupleTable = nodeTupleTable;
+ this.anyGraphs = anyGraphs;
+ this.filter = filter;
+ this.execCxt = execCxt;
+ }
+ }
+
+
+ // Call point for SolverLib.execute
+ public static Iterator<BindingNodeId> solveRX(NodeTupleTable nodeTupleTable, Tuple<Node> tuple, boolean anyGraph,
+ Iterator<BindingNodeId> chain, Predicate<Tuple<NodeId>> filter,
+ ExecutionContext execCxt) {
+ SolverCxt sCxt = new SolverCxt(nodeTupleTable, anyGraph, filter, execCxt);
+ return rdfStarTriple(chain, tuple, sCxt);
+ }
+
+
+ private static Iterator<BindingNodeId> rdfStarTriple(Iterator<BindingNodeId> input, Tuple<Node> pattern, SolverCxt sCxt) {
+ if ( ! tripleHasNodeTriple(pattern) )
+ return matchData( input, pattern, sCxt);
+ return rdfStarTripleSub(input, pattern, sCxt);
+ }
+
+ private static Iterator<BindingNodeId> rdfStarTripleSub(Iterator<BindingNodeId> input,
+ Tuple<Node> pattern, SolverCxt sCxt) {
+ Pair<Iterator<BindingNodeId>, Tuple<Node>> pair = preprocessForTripleTerms(input, pattern, sCxt);
+ Iterator<BindingNodeId> chain2 = matchData(pair.getLeft(), pair.getRight(), sCxt);
+ return chain2;
+ }
+
+ /**
+ * Match a triple pattern (which may have nested triple terms in it).
+ * Any matched triples are added as triple terms bound to the supplied variable.
+ */
+ private static Iterator<BindingNodeId> matchTripleStar(Iterator<BindingNodeId> chain, Var var, Tuple<Node> pattern, SolverCxt sCxt) {
+ if ( tripleHasNodeTriple(pattern) ) {
+ Pair<Iterator<BindingNodeId>, Tuple<Node>> pair =
+ preprocessForTripleTerms(chain, pattern, sCxt);
+ chain = pair.getLeft();
+ pattern = pair.getRight();
+ }
+ // Match to data and assign to var in each binding, based on the triple pattern grounded by the match.
+ Iterator<BindingNodeId> qIter = bindTripleTerm(chain, var, pattern, sCxt);
+ return qIter;
+ }
+
+ // XXX RX
+ private static VarAlloc varAlloc = new VarAlloc("*1*"/*allocTripleTerms*/) ;
+
+ private static Pair<Iterator<BindingNodeId>, Tuple<Node>>
+ preprocessForTripleTerms(Iterator<BindingNodeId> chain, Tuple<Node> patternTuple, SolverCxt sCxt) {
+ int sIdx = subjectIdx(patternTuple);
+ int oIdx = objectIdx(patternTuple);
+
+ Node subject = patternTuple.get(sIdx);
+ Node object = patternTuple.get(oIdx);
+
+ if ( subject.isNodeTriple() && ! subject.isConcrete() ) {
+ Triple tripleTerm = triple(subject);
+ Var var = varAlloc.allocVar();
+ patternTuple = createTuple(patternTuple, var, sIdx);
+ Tuple<Node> patternTuple2 = tuple(tripleTerm);
+ chain = matchTripleStar(chain, var, patternTuple2, sCxt);
+ }
+
+ if ( object.isNodeTriple() && ! object.isConcrete() ) {
+ Triple tripleTerm = triple(object);
+ Var var = varAlloc.allocVar();
+ patternTuple = createTuple(patternTuple, var, oIdx);
+ Tuple<Node> patternTuple2 = tuple(tripleTerm);
+ chain = matchTripleStar(chain, var, patternTuple2, sCxt);
+ }
+
+ // XXX Optimize for no change. But we caught that earlier?
+ return Pair.create(chain, patternTuple);
+ }
+
+ /**
+ * Add a binding to each row with triple grounded by the current row.
+ * If the triple isn't concrete, then just return the row as-is.
+ */
+ private static Iterator<BindingNodeId> bindTripleTerm(Iterator<BindingNodeId> chain, Var var, Tuple<Node> pattern, SolverCxt sCxt) {
+ NodeTable nodeTable = sCxt.nodeTupleTable.getNodeTable();
+ chain = matchData(chain, pattern, sCxt);
+ // Add (var, triple term), filter no matches.
+ chain = Iter.iter(chain).map(b->bindVarTripleTerm(var, pattern, b, nodeTable)).removeNulls();
+ return chain;
+ }
+
+ // We need to reconstruct the reason the pattern matched
+ // to find the NodeId for the Node_Triple.
+ // This involves creating a Node_Triple and looking it up.
+ // This isn't ideal but without triple ids in the database,
+ // there isn't much we can do.
+ private static BindingNodeId bindVarTripleTerm(Var var, Tuple<Node> pattern, BindingNodeId binding, NodeTable nodeTable) {
+ // Get triple out of tuple of length 3 or 4.
+ int idx = (pattern.len()==4) ? 1 : 0;
+
+ // Access to Nodes.
+ Node s = pattern.get(idx);
+ Node s1 = substitute(s, binding, nodeTable);
+ if ( s1 == null || ! s1.isConcrete() )
+ return null;
+
+ Node p = pattern.get(idx+1);
+ Node p1 = substitute(p, binding, nodeTable);
+ if ( p1 == null || ! p1.isConcrete() )
+ return null;
+
+ Node o = pattern.get(idx+2);
+ Node o1 = substitute(o, binding, nodeTable);
+ if ( o1 == null || ! o1.isConcrete() )
+ return null;
+
+ // Does it exist?
+ Node t = NodeFactory.createTripleNode(s1,p1,o1);
+ NodeId tid = nodeTable.getNodeIdForNode(t);
+ // Should not happen.
+ if ( NodeId.isDoesNotExist(tid) )
+ return null;
+ BindingNodeId b2 = new BindingNodeId(binding);
+ b2.put(var, tid);
+ return b2;
+ }
+
+ private static Node substitute(Node node, BindingNodeId binding, NodeTable nodeTable) {
+ if ( ! Var.isVar(node) )
+ return node;
+ Var var = Var.alloc(node);
+ try {
+ NodeId id = binding.get(var) ;
+ if ( id == null )
+ return null ;
+ if ( NodeId.isDoesNotExist(id) )
+ return null;
+ Node n = nodeTable.getNodeForNodeId(id) ;
+ if ( n == null )
+ // But there was to put it in the BindingNodeId.
+ throw new TDBException("No node in NodeTable for NodeId "+id);
+ return n ;
+ } catch (Exception ex)
+ {
+ FmtLog.error(SolverRX.class, ex, "SolverRX: substitute(%s) %s", node, binding) ;
+ return null ;
+ }
+ }
+
+ private static Iterator<BindingNodeId> matchData(Iterator<BindingNodeId> chain, Tuple<Node> pattern, SolverCxt sCxt) {
+ return SolverLib.solve(sCxt.nodeTupleTable, pattern, sCxt.anyGraphs, chain, sCxt.filter, sCxt.execCxt);
+ }
+
+ private static Tuple<Node> createTuple(Tuple<Node> tuple, Var var, int idx) {
+ switch(idx) {
+ case 0: return TupleFactory.create3(var, tuple.get(1), tuple.get(2));
+ case 1: return TupleFactory.create4(tuple.get(0), var, tuple.get(2), tuple.get(3));
+ case 2: return TupleFactory.create3(tuple.get(0), tuple.get(1), var);
+ case 3: return TupleFactory.create4(tuple.get(0), tuple.get(1), tuple.get(2), var);
+ default:
+ throw new ARQException("Index is not recognized: "+idx);
+ }
+ }
+
+ private static int subjectIdx(Tuple<Node> pattern) {
+ switch(pattern.len()) {
+ case 3: return 0;
+ case 4: return 1;
+ default: throw new ARQException("Tuple not of length 3 or 4");
+ }
+ }
+
+ private static int objectIdx(Tuple<Node> pattern) {
+ switch(pattern.len()) {
+ case 3: return 2;
+ case 4: return 3;
+ default: throw new ARQException("Tuple not of length 3 or 4");
+ }
+ }
+
+ // Get NodeId for constants
+ private static NodeId idFor(NodeTable nodeTable, Node node) {
+ if ( Var.isVar(node) )
+ return null;
+ return nodeTable.getNodeIdForNode(node);
+ }
+
+ private static boolean tripleHasNodeTriple(Tuple<Node> pattern) {
+ int sIdx = subjectIdx(pattern);
+ if ( pattern.get(sIdx).isNodeTriple() )
+ return true;
+ int oIdx = subjectIdx(pattern);
+ if ( pattern.get(oIdx).isNodeTriple() )
+ return true;
+ return false;
+ }
+
+ // XXX Somewhere
+ private static Tuple<Node> tuple(Triple triple) {
+ return TupleFactory.create3(triple.getSubject(), triple.getPredicate(), triple.getObject());
+ }
+
+ private static Tuple<Node> tuple(Quad quad) {
+ return TupleFactory.create4(quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject());
+ }
+}
+
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/solver/StageMatchTuple.java b/jena-tdb/src/main/java/org/apache/jena/tdb/solver/StageMatchTuple.java
index 8054343..fad38df 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/solver/StageMatchTuple.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/solver/StageMatchTuple.java
@@ -46,9 +46,9 @@
private Predicate<Tuple<NodeId>> filter ;
public StageMatchTuple(NodeTupleTable nodeTupleTable, Iterator<BindingNodeId> input,
- Tuple<Node> tuple, boolean anyGraphs,
- Predicate<Tuple<NodeId>> filter,
- ExecutionContext execCxt)
+ Tuple<Node> tuple, boolean anyGraphs,
+ Predicate<Tuple<NodeId>> filter,
+ ExecutionContext execCxt)
{
super(input) ;
this.filter = filter ;
@@ -63,7 +63,7 @@
* A variable that is not bound by the binding is placed in the var array.
* Return false if preparation detects the pattern can not match.
*/
- public static boolean prepare(NodeTable nodeTable, Tuple<Node> patternTuple, BindingNodeId input, NodeId ids[], Var[] var)
+ private static boolean prepare(NodeTable nodeTable, Tuple<Node> patternTuple, BindingNodeId input, NodeId ids[], Var[] var)
{
// Process the Node to NodeId conversion ourselves because
// we wish to abort if an unknown node is seen.
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/store/NodeType.java b/jena-tdb/src/main/java/org/apache/jena/tdb/store/NodeType.java
index 40b99cd..9de3a64 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/store/NodeType.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/store/NodeType.java
@@ -24,6 +24,7 @@
import org.apache.jena.tdb.TDBException ;
// Currently unused.
+// Placeholder for inline encoding.
public enum NodeType implements Named
{
// Do not allocate id 0
@@ -54,11 +55,17 @@
@Override public int getTypeId() { return 3 ; }
@Override public String getName() { return "Literal" ; }
} ,
+
+ TRIPLETERM {
+ @Override public XSDDatatype getDatatype() { return null ; }
+ @Override public int getTypeId() { return 4 ; }
+ @Override public String getName() { return "TripleTerm" ; }
+ } ,
// STRING
// {
// @Override public XSDDatatype getDatatype() { return null ; }
-// @Override public int getTypeId() { return 4 ; }
+// @Override public int getTypeId() { return 100 ; }
// @Override public String getName() { return "String" ; }
// } ,
//
@@ -160,8 +167,7 @@
{
if ( n.isURI() ) return URI ;
if ( n.isBlank() ) return BNODE ;
- if ( n.isLiteral() )
- {
+ if ( n.isLiteral() ) {
return LITERAL ;
// if ( n.getLiteralDatatypeURI() == null )
// // String - plain literal
@@ -175,6 +181,7 @@
// if ( n.getLiteralDatatype() == XSDDatatype.XSDdateTime )
// return DATETIME ;
}
+ if ( n.isNodeTriple() ) return TRIPLETERM;
return OTHER ;
}
@@ -184,6 +191,7 @@
if ( type == BNODE.getTypeId() ) return BNODE ;
if ( type == URI.getTypeId() ) return URI ;
if ( type == LITERAL.getTypeId() ) return LITERAL ;
+ if ( type == TRIPLETERM.getTypeId() ) return TRIPLETERM;
// if ( type == STRING.getTypeId() ) return STRING ;
// if ( type == XSDSTRING.getTypeId() ) return XSDSTRING ;
// if ( type == INTEGER.getTypeId() ) return INTEGER ;
diff --git a/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java b/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
index 3a3284e..7752a52 100644
--- a/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
+++ b/jena-tdb/src/main/java/org/apache/jena/tdb/store/nodetable/NodecSSE.java
@@ -18,88 +18,98 @@
package org.apache.jena.tdb.store.nodetable;
-import java.nio.ByteBuffer ;
+import java.nio.ByteBuffer;
-import org.apache.jena.atlas.io.BlockUTF8 ;
-import org.apache.jena.atlas.lib.StrUtils ;
-import org.apache.jena.graph.Node ;
-import org.apache.jena.graph.NodeFactory ;
-import org.apache.jena.riot.RiotException ;
-import org.apache.jena.riot.out.NodeFmtLib ;
-import org.apache.jena.riot.system.PrefixMap ;
-import org.apache.jena.riot.system.PrefixMapNull ;
-import org.apache.jena.riot.tokens.Token ;
-import org.apache.jena.riot.tokens.Tokenizer ;
-import org.apache.jena.riot.tokens.TokenizerFactory ;
-import org.apache.jena.riot.web.LangTag ;
-import org.apache.jena.shared.PrefixMapping ;
-import org.apache.jena.sparql.util.NodeUtils ;
-import org.apache.jena.tdb.TDBException ;
-import org.apache.jena.tdb.lib.StringAbbrev ;
+import org.apache.jena.atlas.io.BlockUTF8;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.atlas.logging.FmtLog;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.NodeFactory;
+import org.apache.jena.graph.Node_Triple;
+import org.apache.jena.graph.Triple;
+import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.out.NodeFmtLib;
+import org.apache.jena.riot.system.PrefixMap;
+import org.apache.jena.riot.system.PrefixMapNull;
+import org.apache.jena.riot.tokens.Token;
+import org.apache.jena.riot.tokens.Tokenizer;
+import org.apache.jena.riot.tokens.TokenizerFactory;
+import org.apache.jena.riot.web.LangTag;
+import org.apache.jena.shared.PrefixMapping;
+import org.apache.jena.sparql.sse.SSE;
+import org.apache.jena.sparql.util.NodeUtils;
+import org.apache.jena.tdb.TDB;
+import org.apache.jena.tdb.TDBException;
/** Simple encoder/decoder for nodes that uses Turtle term string encoding. */
public class NodecSSE implements Nodec
{
// Characters in IRIs that are illegal and cause SSE problems, but we wish to keep.
- private final static char MarkerChar = '_' ;
- private final static char[] invalidIRIChars = { MarkerChar , ' ' } ;
+ private final static char MarkerChar = '_';
+ private final static char[] invalidIRIChars = { MarkerChar , ' ' };
public NodecSSE() {}
@Override
public int maxSize(Node node)
{
- return maxLength(node) ;
+ return maxLength(node);
}
- private static final PrefixMap pmap0 = PrefixMapNull.empty ;
- private static final boolean onlySafeBNodeLabels = false ;
+ private static final PrefixMap pmap0 = PrefixMapNull.empty;
+ private static final boolean onlySafeBNodeLabels = false;
@Override
public int encode(Node node, ByteBuffer bb, PrefixMapping pmap)
{
- String str = null ;
+ if ( ! node.isConcrete() )
+ FmtLog.warn(TDB.logInfo,"Attempt to encode non-concrete node: "+node);
+
+
+
+ String str = null;
if ( node.isURI() )
{
// Pesky spaces etc
- String x = StrUtils.encodeHex(node.getURI(), MarkerChar, invalidIRIChars) ;
+ String x = StrUtils.encodeHex(node.getURI(), MarkerChar, invalidIRIChars);
if ( x != node.getURI() )
- node = NodeFactory.createURI(x) ;
+ node = NodeFactory.createURI(x);
}
if ( node.isLiteral() && NodeUtils.isLangString(node) )
{
// Check syntactically valid.
- String lang = node.getLiteralLanguage() ;
+ String lang = node.getLiteralLanguage();
if ( ! LangTag.check(lang) )
- throw new TDBException("bad language tag: "+node) ;
+ throw new TDBException("bad language tag: "+node);
}
if ( node.isBlank() && ! onlySafeBNodeLabels ) {
// Special case.
- str = "_:"+node.getBlankNodeLabel() ;
+ str = "_:"+node.getBlankNodeLabel();
}
- // Node->String
+ if ( node.isNodeTriple() ) {
+ str = NodeFmtLib.str(node);
+ }
+
+ // Catch-all: Node->String
if ( str == null )
- str = NodeFmtLib.str(node, (String)null, pmap0) ;
- // String -> bytes ;
- BlockUTF8.fromChars(str, bb) ;
- bb.flip() ;
- return bb.limit() ;
+ str = NodeFmtLib.str(node);
+ // String -> bytes;
+ BlockUTF8.fromChars(str, bb);
+ bb.flip();
+ return bb.limit();
}
-
+
@Override
- public Node decode(ByteBuffer bb, PrefixMapping pmap)
- {
+ public Node decode(ByteBuffer bb, PrefixMapping pmap) {
// Ideally, this would be straight from the byte buffer.
// But currently we go bytes -> string -> node
// Byte -> String
- String str = BlockUTF8.toString(bb) ;
- //OLD
- //String str = Bytes.fromByteBuffer(bb) ;
+ String str = BlockUTF8.toString(bb);
// String -> Node
// Easy cases.
@@ -108,32 +118,37 @@
// Must be done this way.
// In particular, bnode labels can contain ":" from Jena
// TokenizerText does not recognize these.
- str = str.substring(2) ;
- return NodeFactory.createBlankNode(str) ;
+ str = str.substring(2);
+ return NodeFactory.createBlankNode(str);
}
- if ( str.startsWith("<") )
+ if ( str.startsWith("<<") ) {
+ // Complex - not a single token so use full machinery.
+ return SSE.parseNode(str);
+ }
+
+ if ( str.startsWith("<") )
{
// Do directly.
// (is it quicker?)
- str = str.substring(1,str.length()-1) ;
- str = StrUtils.unescapeString(str) ;
- str = StrUtils.decodeHex(str, MarkerChar) ;
- return NodeFactory.createURI(str) ;
+ str = str.substring(1,str.length()-1);
+ str = StrUtils.unescapeString(str);
+ str = StrUtils.decodeHex(str, MarkerChar);
+ return NodeFactory.createURI(str);
}
- Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(str) ;
+ Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(str);
if ( ! tokenizer.hasNext() )
- throw new TDBException("Failed to tokenise: "+str) ;
- Token t = tokenizer.next() ;
+ throw new TDBException("Failed to tokenize: "+str);
+ Token t = tokenizer.next();
try {
- Node n = t.asNode() ;
- if ( n == null ) throw new TDBException("Not a node: "+str) ;
- return n ;
+ Node n = t.asNode();
+ if ( n == null ) throw new TDBException("Not a node: "+str);
+ return n;
} catch (RiotException ex)
{
- throw new TDBException("Bad string for node: "+str) ;
+ throw new TDBException("Bad string for node: "+str);
}
}
@@ -142,25 +157,31 @@
{
if ( node.isBlank() )
// "_:"
- return 2+maxLength(node.getBlankNodeLabel()) ;
+ return 2+maxLength(node.getBlankNodeLabel());
if ( node.isURI() )
// "<>"
- return 2+maxLength(node.getURI()) ;
+ return 2+maxLength(node.getURI());
if ( node.isLiteral() )
{
- int len = 2+maxLength(node.getLiteralLexicalForm()) ;
+ int len = 2+maxLength(node.getLiteralLexicalForm());
if ( NodeUtils.isLangString(node) )
// Space for @ (language tag is ASCII)
- len = len + 3 + node.getLiteralLanguage().length() ;
+ len = len + 3 + node.getLiteralLanguage().length();
else if ( ! NodeUtils.isSimpleString(node) )
// The quotes and also space for ^^<>
- len = len + 4 + maxLength(node.getLiteralDatatypeURI()) ;
- return len ;
+ len = len + 4 + maxLength(node.getLiteralDatatypeURI());
+ return len;
}
if ( node.isVariable() )
// "?"
- return 1+maxLength(node.getName()) ;
- throw new TDBException("Unrecognized node type: "+node) ;
+ return 1+maxLength(node.getName());
+ if ( node.isNodeTriple() ) {
+ Triple t = Node_Triple.triple(node);
+ // Leading an trailing <<>>, 4 spaces
+ return (2+4+2)+maxLength(t.getSubject())+maxLength(t.getPredicate())+maxLength(t.getObject());
+ }
+
+ throw new TDBException("Unrecognized node type: "+node);
}
private static int maxLength(String string)
@@ -168,40 +189,40 @@
// Very worse case for UTF-8 - and then some.
// Encoding every character as _XX or bad UTF-8 conversion (3 bytes)
// Max 3 bytes UTF-8 for up to 10FFFF (NB Java treats above 16bites as surrogate pairs only).
- return string.length()*3 ;
+ return string.length()*3;
}
- // URI compression can be effective but literals are more of a problem. More variety.
- public final static boolean compression = false ;
- private static StringAbbrev abbreviations = new StringAbbrev() ;
- static {
- abbreviations.add( "rdf", "<http://www.w3.org/1999/02/22-rdf-syntax-ns#") ;
- abbreviations.add( "rdfs", "<http://www.w3.org/2000/01/rdf-schema#") ;
- abbreviations.add( "xsd", "<http://www.w3.org/2001/XMLSchema#") ;
-
- // MusicBrainz
- abbreviations.add( "mal", "<http://musicbrainz.org/mm-2.1/album/") ;
- abbreviations.add( "mt", "<http://musicbrainz.org/mm-2.1/track/") ;
- abbreviations.add( "mar", "<http://musicbrainz.org/mm-2.1/artist/") ;
- abbreviations.add( "mtr", "<http://musicbrainz.org/mm-2.1/trmid/") ;
- abbreviations.add( "mc", "<http://musicbrainz.org/mm-2.1/cdindex/") ;
-
- abbreviations.add( "m21", "<http://musicbrainz.org/mm/mm-2.1#") ;
- abbreviations.add( "dc", "<http://purl.org/dc/elements/1.1/") ;
- // DBPedia
- abbreviations.add( "r", "<http://dbpedia/resource/") ;
- abbreviations.add( "p", "<http://dbpedia/property/") ;
- }
- private String compress(String str)
- {
- if ( !compression || abbreviations == null ) return str ;
- return abbreviations.abbreviate(str) ;
- }
-
- private String decompress(String x)
- {
- if ( !compression || abbreviations == null ) return x ;
- return abbreviations.expand(x) ;
- }
-
+ // See also StringFile.
+// // URI compression can be effective but literals are more of a problem. More variety.
+// public final static boolean compression = false;
+// private static StringAbbrev abbreviations = new StringAbbrev();
+// static {
+// abbreviations.add( "rdf", "<http://www.w3.org/1999/02/22-rdf-syntax-ns#");
+// abbreviations.add( "rdfs", "<http://www.w3.org/2000/01/rdf-schema#");
+// abbreviations.add( "xsd", "<http://www.w3.org/2001/XMLSchema#");
+//
+// // MusicBrainz
+// abbreviations.add( "mal", "<http://musicbrainz.org/mm-2.1/album/");
+// abbreviations.add( "mt", "<http://musicbrainz.org/mm-2.1/track/");
+// abbreviations.add( "mar", "<http://musicbrainz.org/mm-2.1/artist/");
+// abbreviations.add( "mtr", "<http://musicbrainz.org/mm-2.1/trmid/");
+// abbreviations.add( "mc", "<http://musicbrainz.org/mm-2.1/cdindex/");
+//
+// abbreviations.add( "m21", "<http://musicbrainz.org/mm/mm-2.1#");
+// abbreviations.add( "dc", "<http://purl.org/dc/elements/1.1/");
+// // DBPedia
+// abbreviations.add( "r", "<http://dbpedia/resource/");
+// abbreviations.add( "p", "<http://dbpedia/property/");
+// }
+// private String compress(String str)
+// {
+// if ( !compression || abbreviations == null ) return str;
+// return abbreviations.abbreviate(str);
+// }
+//
+// private String decompress(String x)
+// {
+// if ( !compression || abbreviations == null ) return x;
+// return abbreviations.expand(x);
+// }
}