| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.tdb.lib; |
| |
| import static org.apache.jena.tdb.sys.SystemTDB.LenNodeHash ; |
| |
| import java.nio.ByteBuffer ; |
| import java.security.DigestException ; |
| import java.security.MessageDigest ; |
| import java.security.NoSuchAlgorithmException ; |
| import java.util.Iterator ; |
| |
| import org.apache.jena.atlas.iterator.Iter ; |
| import org.apache.jena.atlas.lib.Bytes ; |
| import org.apache.jena.atlas.lib.Pool ; |
| import org.apache.jena.atlas.lib.PoolBase ; |
| import org.apache.jena.atlas.lib.PoolSync ; |
| import org.apache.jena.atlas.logging.Log ; |
| import org.apache.jena.graph.Node ; |
| import org.apache.jena.sparql.util.NodeUtils ; |
| import org.apache.jena.tdb.TDBException ; |
| import org.apache.jena.tdb.base.objectfile.ObjectFile ; |
| import org.apache.jena.tdb.base.record.Record ; |
| import org.apache.jena.tdb.store.Hash ; |
| import org.apache.jena.tdb.store.NodeId ; |
| import org.apache.jena.tdb.store.NodeType ; |
| import org.apache.jena.tdb.store.nodetable.NodeTable ; |
| import org.apache.jena.tdb.store.nodetable.Nodec ; |
| import org.apache.jena.tdb.store.nodetable.NodecSSE ; |
| |
| public class NodeLib |
| { |
| private static Nodec nodec = new NodecSSE() ; |
| |
| /** |
| * Encode and write a {@link Node} to the {@link ObjectFile}. Returns the location, |
| * suitable for use with {@link #fetchDecode}. |
| */ |
| public static long encodeStore(Node node, ObjectFile file) { |
| return encodeStore(node, file, null); |
| } |
| |
| /** |
| * Encode and write a {@link Node} to the {@link ObjectFile}. |
| * Uses the given {@link ByteBuffer} for encoding space if possible. |
| * Returns the location, suitable for use with {@link #fetchDecode}. |
| */ |
| public static long encodeStore(Node node, ObjectFile file, ByteBuffer bb) { |
| int maxSize = nodec.maxSize(node); |
| if ( bb == null ) |
| return allocEncodeWrite(node, file, maxSize); |
| if ( bb.capacity() < maxSize ) |
| // Buffer may not be big enough. |
| return allocEncodeWrite(node, file, maxSize); |
| // Use buffer provided. |
| bb.clear(); |
| return encodeWrite(node, file, bb); |
| } |
| |
| /** Encode and write, allocating space as needed */ |
| private static long allocEncodeWrite(Node node, ObjectFile file, int maxSize) { |
| ByteBuffer bb = ByteBuffer.allocate(maxSize); |
| return encodeWrite(node, file, bb); |
| } |
| |
| /** Encode and write, using the space provided which is assumed to be large enough. */ |
| private static long encodeWrite(Node node, ObjectFile file, ByteBuffer bb) { |
| int len = nodec.encode(node, bb, null); |
| long x = file.write(bb); |
| return x; |
| } |
| |
| /** |
| * Read and decode a {@link Node} from the {@link ObjectFile}. The {@code id} must |
| * have originally been generated by {@link #encodeStore}. |
| */ |
| public static Node fetchDecode(long id, ObjectFile file) { |
| ByteBuffer bb = file.read(id); |
| if ( bb == null ) |
| return null; |
| return decode(bb); |
| } |
| |
| /** |
| * Encode a node - it is better to use encodeStore which may avoid an additional copy |
| * in getting the node into the ObjectFile and may avoid short-term byte buffer |
| * allocation. |
| */ |
| public static ByteBuffer encode(Node node) { |
| int maxSize = nodec.maxSize(node); |
| ByteBuffer bb = ByteBuffer.allocate(maxSize); |
| int len = nodec.encode(node, bb, null); |
| bb.limit(len); |
| bb.position(0); |
| return bb; |
| } |
| |
| /** |
| * Decode a node - it is better to use fetchDecode which may avoid an additional copy |
| * in getting the node from the ObjectFile. |
| */ |
| public static Node decode(ByteBuffer bb) { |
| bb.position(0); |
| Node n = nodec.decode(bb, null); |
| return n; |
| } |
| |
| public static Hash hash(Node n) { |
| Hash h = new Hash(LenNodeHash); |
| setHash(h, n); |
| return h; |
| } |
| |
| public static void setHash(Hash h, Node n) { |
| NodeType nt = NodeType.lookup(n); |
| switch (nt) { |
| case URI : |
| hash(h, n.getURI(), null, null, nt); |
| return; |
| case BNODE : |
| hash(h, n.getBlankNodeLabel(), null, null, nt); |
| return; |
| case LITERAL : |
| String dt = n.getLiteralDatatypeURI(); |
| if ( NodeUtils.isSimpleString(n) || NodeUtils.isLangString(n) ) { |
| // RDF 1.1 : No datatype for: |
| // xsd:String as simple literals |
| // rdf:langString and @ |
| dt = null; |
| } |
| hash(h, n.getLiteralLexicalForm(), n.getLiteralLanguage(), dt, nt); |
| return; |
| case OTHER : |
| throw new TDBException("Attempt to hash something strange: " + n); |
| } |
| throw new TDBException("NodeType broken: " + n); |
| } |
| |
| private static int InitialPoolSize = 5; |
| private static Pool<MessageDigest> digesters = PoolSync.create(new PoolBase<MessageDigest>()); |
| static { |
| try { |
| for ( int i = 0 ; i < InitialPoolSize ; i++ ) |
| digesters.put(MessageDigest.getInstance("MD5")); |
| } |
| catch (NoSuchAlgorithmException e) { |
| e.printStackTrace(); |
| } |
| } |
| |
| private static MessageDigest allocDigest() { |
| try { |
| MessageDigest disgest = digesters.get(); |
| if ( disgest == null ) |
| disgest = MessageDigest.getInstance("MD5"); |
| return disgest; |
| } |
| catch (NoSuchAlgorithmException e) { |
| e.printStackTrace(); |
| return null; |
| } |
| } |
| |
| private static void deallocDigest(MessageDigest digest) { |
| digest.reset(); |
| digesters.put(digest); |
| } |
| |
| private static void hash(Hash h, String lex, String lang, String datatype, NodeType nodeType) { |
| if ( datatype == null ) |
| datatype = ""; |
| if ( lang == null ) |
| lang = ""; |
| String toHash = lex + "|" + lang + "|" + datatype + "|" + nodeType.getName(); |
| MessageDigest digest; |
| try { |
| digest = allocDigest(); |
| digest.update(Bytes.string2bytes(toHash)); |
| if ( h.getLen() == 16 ) |
| // MD5 is 16 bytes. |
| digest.digest(h.getBytes(), 0, 16); |
| else { |
| byte b[] = digest.digest(); // 16 bytes. |
| // Avoid the copy if length is 16? |
| // digest.digest(bytes, 0, length) needs 16 bytes |
| System.arraycopy(b, 0, h.getBytes(), 0, h.getLen()); |
| } |
| deallocDigest(digest); |
| return; |
| } |
| catch (DigestException ex) { |
| Log.error(NodeLib.class, "DigestException", ex); |
| } |
| } |
| |
| public static NodeId getNodeId(Record r, int idx) { |
| return NodeId.create(Bytes.getLong(r.getKey(), idx)); |
| } |
| |
| public static Node termOrAny(Node node) { |
| if ( node == null || node.isVariable() ) |
| return Node.ANY; |
| return node; |
| } |
| |
| public static Iterator<Node> nodes(final NodeTable nodeTable, Iterator<NodeId> iter) { |
| return Iter.map(iter, nodeTable::getNodeForNodeId); |
| } |
| } |