| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.riot.lang; |
| |
| import java.security.MessageDigest; |
| import java.security.NoSuchAlgorithmException; |
| import java.util.UUID; |
| import java.util.concurrent.Callable; |
| |
| import org.apache.jena.atlas.lib.Bytes; |
| import org.apache.jena.atlas.lib.Cache; |
| import org.apache.jena.atlas.lib.CacheFactory; |
| import org.apache.jena.atlas.lib.InternalErrorException; |
| import org.apache.jena.graph.Node; |
| import org.apache.jena.graph.NodeFactory; |
| |
| /** |
| * Allocate bnode labels using a per-run seed and the label presented. |
| * <p> |
| * This is the most scalable, always legal allocator. |
| * <p> |
| * New allocators must be created per parser run, or .reset() called. These are |
| * fed to a digest to give a bit string, (currently MD5, to get a 128bit bit |
| * string) that is used to form a bNode AnonId of hex digits. |
| * <p> |
| * In addition, there is a cache of label{@literal ->}node allocations, using the natural |
| * tendency to locality in a database dump. (subject bNodes, bNodes in lists |
| * and other data values structures like unit values). |
| * <p> |
| * Not thread safe. |
| */ |
| |
| public class BlankNodeAllocatorHash implements BlankNodeAllocator { |
| private static String DigestAlgorithm = "MD5"; |
| private static int CacheSize = 1000; |
| private MessageDigest mDigest; |
| private byte[] seedBytes; |
| // long+2 bytes to distinguish from UTF-8 bytes. |
| private byte[] counterBytes = new byte[10]; |
| private Cache<String, Node> cache; |
| private long counter = 0; |
| |
| public BlankNodeAllocatorHash() { |
| reset(); |
| try { |
| mDigest = MessageDigest.getInstance(DigestAlgorithm); |
| } catch (NoSuchAlgorithmException e) { |
| throw new InternalErrorException("failed to create message digest", e); |
| } |
| cache = CacheFactory.createCache(CacheSize); |
| } |
| |
| /** |
| * Gets a fresh seed value |
| * <p> |
| * Note that this is called almost immediately by the constructor |
| * and on this initial call you will not yet have access to any |
| * implementation specific information used to select the seed. |
| * </p> |
| * <p> |
| * Implementations <strong>must</strong> return a non-null value |
| * so if you can't decide a seed prior to seeing your derived |
| * implementations constructor inputs you should return a temporary |
| * fake value initially. You can then call {@link #reset()} in your |
| * own constructor after you've taken the necessary steps that allow |
| * you to decide how to generate your own seed. |
| * </p> |
| * @return Seed value |
| */ |
| protected UUID freshSeed() { |
| return UUID.randomUUID(); |
| } |
| |
| @Override |
| public void reset() { |
| UUID seed = this.freshSeed(); |
| seedBytes = new byte[128 / 8]; |
| Bytes.setLong(seed.getMostSignificantBits(), seedBytes, 0); |
| Bytes.setLong(seed.getLeastSignificantBits(), seedBytes, 8); |
| if ( cache != null ) |
| cache.clear(); |
| } |
| |
| @Override |
| public Node alloc(final String label) { |
| Callable<Node> getter = new Callable<Node>() { |
| @Override |
| public Node call() { |
| return alloc(Bytes.string2bytes(label)); |
| } |
| }; |
| Node n = cache.getOrFill(label, getter); |
| return n; |
| } |
| |
| @Override |
| public Node create() { |
| counter++; |
| // Make illegal string bytes so can't clash with alloc(String). |
| // It is different because it has zeros in it. |
| counterBytes[0] = 0; |
| counterBytes[1] = 0; |
| Bytes.setLong(counter, counterBytes, 2); |
| return alloc(counterBytes); |
| } |
| |
| private Node alloc(byte[] labelBytes) { |
| // UUID.nameUUIDFromBytes(seedBytes+labelBytes) uses MD5 but creates the digester |
| // each time. It also stamps in the UUID version/variant bits. |
| mDigest.update(seedBytes); |
| mDigest.update(labelBytes); |
| byte[] bytes = mDigest.digest(); // resets |
| String hexString = Bytes.asHexLC(bytes); |
| return NodeFactory.createBlankNode(hexString); |
| } |
| } |