Merge pull request #649 from afs/fuseki-1794
JENA-1794: Look for rdf:type fuseki:Service if duplicate fuseki:name
diff --git a/README.md b/README.md
index 66f39d2..e1399b4 100644
--- a/README.md
+++ b/README.md
@@ -7,4 +7,4 @@
The codebase for the active modules is in git:
-https://git-wip-us.apache.org/repos/asf?p=jena.git
+https://github.com/apache/jena
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index d22f5c3..d01ddf2 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -85,7 +85,7 @@
if ( filename.startsWith("file:") )
{
filename = filename.substring("file:".length());
- filename = IRILib.decode(filename);
+ filename = IRILib.decodeHex(filename);
}
InputStream in = new FileInputStream(filename);
String ext = FilenameUtils.getExtension(filename);
@@ -180,7 +180,7 @@
if ( filename.startsWith("file:") )
{
filename = filename.substring("file:".length());
- filename = IRILib.decode(filename);
+ filename = IRILib.decodeHex(filename);
}
OutputStream out = new FileOutputStream(filename);
String ext = FilenameUtils.getExtension(filename);
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
index ffe05de..8c6dfff 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/IRILib.java
@@ -232,10 +232,6 @@
return uri ;
}
- public static String decode(String string) {
- return StrUtils.decodeHex(string, '%') ;
- }
-
public static String encodeNonASCII(String string) {
if ( ! containsNonASCII(string) )
return string ;
@@ -248,7 +244,7 @@
sw.append( (char) b );
continue;
}
-
+
int hi = ( b & 0xF0 ) >> 4;
int lo = b & 0xF;
sw.append( '%' );
@@ -265,5 +261,24 @@
return true;
}
return false ;
+ }
+
+ /** @deprecated Use {@link #decodeHex} */
+ @Deprecated
+ public static String decode(String string) { return decodeHex(string); }
+
+ /**
+ * Decode a string that may have %-encoded sequences.
+ * <p>
+ * This function will reverse
+ * {@link #encodeNonASCII(String)},
+ * {@link #encodeUriPath(String)},
+ * {@link #encodeFileURL(String)} and
+ * {@link #encodeUriComponent(String)}.
+ *
+ * It will not decode '+' used for space (application/x-www-form-urlencoded).
+ */
+ public static String decodeHex(String string) {
+ return StrUtils.decodeHex(string, '%') ;
}
}
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/StrUtils.java b/jena-base/src/main/java/org/apache/jena/atlas/lib/StrUtils.java
index f179bd3..796a7ca 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/StrUtils.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/StrUtils.java
@@ -22,6 +22,7 @@
import static java.util.Arrays.stream ;
import static java.util.stream.Collectors.toList;
+import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -200,33 +201,38 @@
/**
* Decode a string using marked hex values e.g. %20
*
- * @param str String to decode
- * @param marker The marker charcater
+ * @param str String to decode : characters should be ASCII (<127)
+ * @param marker The marker character
* @return Decoded string (returns input object on no change)
*/
public static String decodeHex(String str, char marker) {
- int idx = str.indexOf(marker);
- if ( idx == -1 )
+ if ( str.indexOf(marker) < 0 )
return str;
- StringBuilder buff = new StringBuilder();
-
- buff.append(str, 0, idx);
- int N = str.length();
-
- for ( ; idx < N ; idx++ ) {
- char ch = str.charAt(idx);
- // First time through this is true, always.
- if ( ch != marker )
- buff.append(ch);
- else {
- char hi = str.charAt(idx + 1);
- char lo = str.charAt(idx + 2);
- char ch2 = (char)(hexDecode(hi) << 4 | hexDecode(lo));
- buff.append(ch2);
- idx += 2;
+ // This function does work if input str is not pure ASCII.
+ // The tricky part is if an %-encoded part is a UTF-8 sequence.
+ // An alternative algorithm is to work in chars from the string, and handle
+ // that case %-endocded when value has the high bit set.
+ byte[] strBytes = StrUtils.asUTF8bytes(str);
+ final int N = strBytes.length;
+ // Max length
+ byte[] bytes = new byte[strBytes.length];
+ int i = 0;
+ for ( int j = 0 ; j < N ; j++ ) {
+ byte b = strBytes[j];
+ if ( b != marker ) {
+ bytes[i++] = b;
+ continue;
}
+ // Marker.
+ char hi = str.charAt(j + 1);
+ char lo = str.charAt(j + 2);
+ j += 2;
+ int x1 = hexDecode(hi);
+ int x2 = hexDecode(lo);
+ int ch2 = (hexDecode(hi) << 4 | hexDecode(lo));
+ bytes[i++] = (byte)ch2;
}
- return buff.toString();
+ return new String(bytes, 0, i, StandardCharsets.UTF_8);
}
// Encoding is table-driven but for decode, we use code.
diff --git a/jena-base/src/test/java/org/apache/jena/atlas/lib/TS_Lib.java b/jena-base/src/test/java/org/apache/jena/atlas/lib/TS_Lib.java
index b05d47c..87abe6e 100644
--- a/jena-base/src/test/java/org/apache/jena/atlas/lib/TS_Lib.java
+++ b/jena-base/src/test/java/org/apache/jena/atlas/lib/TS_Lib.java
@@ -41,6 +41,7 @@
, TestCache2.class
, TestFileOps.class
, TestStrUtils.class
+ , TestIRILib.class
, TestXMLLib.class
, TestAlarmClock.class
, TestTrie.class
diff --git a/jena-base/src/test/java/org/apache/jena/atlas/lib/TestIRILib.java b/jena-base/src/test/java/org/apache/jena/atlas/lib/TestIRILib.java
new file mode 100644
index 0000000..cd11e22
--- /dev/null
+++ b/jena-base/src/test/java/org/apache/jena/atlas/lib/TestIRILib.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.atlas.lib;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class TestIRILib {
+
+ @Test public void encodeDecode01() { encodeDecode(""); }
+
+ @Test public void encodeDecode02() { encodeDecode("aa"); }
+
+ @Test public void encodeDecode03() { encodeDecode("aa"); }
+
+ @Test public void encodeDecode04() { encodeDecode("Größe"); }
+
+ private void encodeDecode(String testString) {
+ String encoded = IRILib.encodeNonASCII(testString);
+ String decoded = IRILib.decodeHex(encoded);
+ if ( ! testString.equals(decoded) ) {
+ System.out.println(encoded);
+ }
+ assertEquals(testString, decoded);
+ }
+
+}
diff --git a/jena-db/jena-dboe-transaction/src/main/java/org/apache/jena/dboe/transaction/txn/TransactionInfo.java b/jena-db/jena-dboe-transaction/src/main/java/org/apache/jena/dboe/transaction/txn/TransactionInfo.java
index a727f85..d49698a 100644
--- a/jena-db/jena-dboe-transaction/src/main/java/org/apache/jena/dboe/transaction/txn/TransactionInfo.java
+++ b/jena-db/jena-dboe-transaction/src/main/java/org/apache/jena/dboe/transaction/txn/TransactionInfo.java
@@ -63,7 +63,7 @@
*/
public ReadWrite getMode();
- /** Is this currently a READ transaction? Promotion may chnage the mode.
+ /** Is this currently a READ transaction? Promotion may change the mode.
* Convenience operation equivalent to {@code (getMode() == ReadWrite.READ)}
*/
public default boolean isReadTxn() { return getMode() == ReadWrite.READ; }
@@ -81,4 +81,3 @@
}
}
-
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/TDB2StorageBuilder.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/TDB2StorageBuilder.java
index 18b76cd..5648afe 100644
--- a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/TDB2StorageBuilder.java
+++ b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/TDB2StorageBuilder.java
@@ -62,9 +62,9 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/** Build TDB2 databases based on {@linkplain DatabaseRDF}.
+/** Build TDB2 databases based on {@linkplain DatabaseRDF}.
* This builds the storage database, not the switchable.
- *
+ *
* {@link DatabaseOps#createSwitchable} adds the switching layer
* and is called by {@link DatabaseConnection#make}.
*/
@@ -300,67 +300,15 @@
private NodeTable buildNodeTable(String name) {
NodeTable nodeTable = buildBaseNodeTable(name);
-
+
nodeTable = NodeTableCache.create(nodeTable, params);
-
+
if ( nodeTable instanceof NodeTableCache ) {
NodeTableCache nodeTableCache = (NodeTableCache)nodeTable;
-
- // [1746] A "notification" - better way to do this?
- // Need to go before the storage of the node table commits.
-// TransactionalComponent tc = new TransactionalComponentBase<Object>(ComponentId.allocLocal()) {
-//
-// private Object state = new Object();
-// private TxnId activeWriter = null;
-//
-// @Override
-// protected Object _begin(ReadWrite readWrite, TxnId txnId) {
-// System.out.println("_begin");
-//// // XXX OK?
-//// if ( isWriteTxn() ) {
-//// nodeTableCache.updateBegin(txnId);
-//// activeWriter = txnId;
-//// }
-// return state;
-// }
-//
-// @Override
-// protected Object _promote(TxnId txnId, Object state) {
-// System.out.println("_promote");
-//// if ( isWriteTxn() ) {
-//// nodeTableCache.updateBegin(txnId);
-//// activeWriter = txnId;
-//// }
-// return state;
-// }
-//
-//// @Override
-//// protected void _commit(TxnId txnId, Object state) {}
-//
-// @Override
-// protected void _commitEnd(TxnId txnId, Object state) {
-// System.out.println("_commitEnd");
-//// if ( activeWriter == txnId ) {
-//// nodeTableCache.updateCommit();
-//// activeWriter = null;
-//// }
-// }
-//
-// @Override
-// protected void _abort(TxnId txnId, Object state) {
-// System.out.println("_abort");
-//// if ( activeWriter == txnId ) {
-//// nodeTableCache.updateAbort();
-//// activeWriter = null;
-//// }
-// }
-// };
-// components.add(tc);
- // [1746]
listeners.add(nodeTableCache);
}
-
+
nodeTable = NodeTableInline.create(nodeTable);
return nodeTable;
}
@@ -384,7 +332,7 @@
TransBinaryDataFile transBinFile = new TransBinaryDataFile(binFile, cid, pState);
return transBinFile;
}
-
+
private static boolean warnAboutOptimizer = true ;
public static ReorderTransformation chooseReorderTransformation(Location location) {
if ( location == null )
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/NodeTableCache.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/NodeTableCache.java
index 0d3d7fd..aa02c25 100644
--- a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/NodeTableCache.java
+++ b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/NodeTableCache.java
@@ -41,25 +41,31 @@
*/
public class NodeTableCache implements NodeTable, TransactionListener {
// These caches are updated together.
- // See synchronization in _retrieveNodeByNodeId and _idForNode
+ // See synchronization in _retrieveNodeByNodeId and _idForNode.
// The cache is assumed to be single operation-thread-safe.
-
- // The buffering is for updates. Only the updating thread will see changes due to new nodes
- // Case 1: Not in main "not-present"
- // Add to local "not-present", flush down.
-
- // Case 2: In main "not-present"
- // May be goes into local cache.
- // Write back updates "not-present"
- // Depends on "not-rpesent" used to protect the underlying "not-present"
-
-
+ // The buffering is for updates so that if it aborts, the changes are not made;
+ // the underlying node table, being transactional, also does not make the changes.
+ //
+ // It does not matter if a readers can see nodes from a completed now-finished
+ // writer transaction. Nodes in the node table do not mean triples exist and only triples detemine
+ // the state of the data.
+ //
+ // Where there are only readers active the ThreadBufferingCache caches act as
+ // pass-through and the not-present cache can be updated by any reader.
+ //
+ // When there is an active writer, the ThreadBufferingCache caches add a
+ // write-visible-only caching and only the writer can update the "not-present"
+ // cache. Because the node table is append-only (nodes are not deleted), it can
+ // mean a node which was not-present is added and the not-present cache now does
+ // not catch that for a previous version reader. This does not matter, the small
+ // not-present cache is only a speed-up and does not have to be correct
+ // for missing nodes (it can't have entries for nodes that do exist in visible
+ // data).
private ThreadBufferingCache<Node, NodeId> node2id_Cache = null;
private ThreadBufferingCache<NodeId, Node> id2node_Cache = null;
// A small cache of "known unknowns" to speed up searching for impossible things.
- // Cache update needed on NodeTable changes because a node may become "known"
private Cache<Node, Object> notPresent = null;
private NodeTable baseTable;
private final Object lock = new Object();
@@ -297,14 +303,16 @@
notPresent.remove(node);
}
- // A top-level transaction is either
+ // A top-level transaction can update the not-present cache.
+ // It is either
// - a write transaction or
- // - a read transaction with most recent data version given that there's no active write transaction.
+ // - a read transaction and no active writer.
private boolean inTopLevelTxn() {
Thread writer = writingThread;
return (writer == null) || (writer == Thread.currentThread());
}
+ // -- TransactionListener
@Override
public void notifyTxnStart(Transaction transaction) {
if (transaction.isWriteTxn())
@@ -329,18 +337,17 @@
if(transaction.isWriteTxn())
updateAbort();
}
-
- // ----
+ // -- TransactionListener
// The cache is "optimistic" - nodes are added during the transaction.
- // It does not matter if they get added (and visible earlier)
- // because this is nothing more than "preallocation". Triples (Tuple of NodeIds) don't match.
-
- // Underlying file has them "transactionally".
-
+ // The underlying file has them "transactionally".
+ //
// On abort, it does need to be undone because the underlying NodeTable
// being cached will not have them.
-
+ //
+ // We don't "undo" for abort because it would mean keeping an data structure that
+ // is related to the size of the transaction and if in-memory, a limitation of
+ // scale.
private void updateStart() {
node2id_Cache.enableBuffering();
id2node_Cache.enableBuffering();
@@ -383,6 +390,7 @@
id2node_Cache = null;
notPresent = null;
baseTable = null;
+ writingThread = null;
}
@Override
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/ThreadBufferingCache.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/ThreadBufferingCache.java
index e946e53..b683671 100644
--- a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/ThreadBufferingCache.java
+++ b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/store/nodetable/ThreadBufferingCache.java
@@ -55,7 +55,7 @@
private String label;
// This turns the feature off. Development only. Do not release with this set "false".
private static final boolean BUFFERING = true;
-
+
public ThreadBufferingCache(String label, Cache<Key,Value> mainCache, int size) {
this.localCache = CacheFactory.createCache(size);
this.baseCache = mainCache;
@@ -72,14 +72,13 @@
return bufferingThread.get() == currentThread;
}
- // XXX [1746] Can replace by direct use.
private Cache<Key, Value> localCache() {
return localCache;
}
// ---- Buffer management.
// Only one thread can be using the additional caches.
-
+
public void enableBuffering() {
if ( ! BUFFERING )
return;
@@ -89,7 +88,7 @@
throw new TDBException(Lib.className(this)+": already buffering");
}
}
-
+
/** Write the local cache to the main cache, and reset the local cache. */
public void flushBuffer() {
if ( ! buffering() )
@@ -116,7 +115,7 @@
localCache().clear();
bufferingThread.set(null);
}
-
+
public Cache<Key, Value> getBuffer() {
return localCache();
}
@@ -167,7 +166,7 @@
// ---- Flush changes, reset.
-
+
// ---- Updates to buffering, local cache.