[MCLEAN-109] Add METRO hash implementation (#58)
diff --git a/pom.xml b/pom.xml
index 5ac96a6..507d6ca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -251,6 +251,18 @@
</exclusion>
</exclusions>
</dependency>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-core</artifactId>
+ <version>1.36</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.openjdk.jmh</groupId>
+ <artifactId>jmh-generator-annprocess</artifactId>
+ <version>1.36</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
diff --git a/src/main/java/org/apache/maven/buildcache/hash/HashFactory.java b/src/main/java/org/apache/maven/buildcache/hash/HashFactory.java
index 73cfc16..731480a 100644
--- a/src/main/java/org/apache/maven/buildcache/hash/HashFactory.java
+++ b/src/main/java/org/apache/maven/buildcache/hash/HashFactory.java
@@ -22,6 +22,8 @@
import java.util.HashMap;
import java.util.Map;
+import net.openhft.hashing.LongHashFunction;
+
/**
* HashFactory
*/
@@ -30,8 +32,10 @@ public enum HashFactory {
SHA256(new SHA("SHA-256")),
SHA384(new SHA("SHA-384")),
SHA512(new SHA("SHA-512")),
- XX(new XX()),
- XXMM(new XXMM());
+ XX(new Zah("XX", LongHashFunction.xx(), Zah.MemoryPolicy.Standard)),
+ XXMM(new Zah("XXMM", LongHashFunction.xx(), Zah.MemoryPolicy.MemoryMappedBuffers)),
+ METRO(new Zah("METRO", LongHashFunction.metro(), Zah.MemoryPolicy.Standard)),
+ METRO_MM(new Zah("METRO+MM", LongHashFunction.metro(), Zah.MemoryPolicy.MemoryMappedBuffers));
private static final Map<String, HashFactory> LOOKUP = new HashMap<>();
diff --git a/src/main/java/org/apache/maven/buildcache/hash/XX.java b/src/main/java/org/apache/maven/buildcache/hash/XX.java
deleted file mode 100644
index 59fc966..0000000
--- a/src/main/java/org/apache/maven/buildcache/hash/XX.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.maven.buildcache.hash;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import net.openhft.hashing.LongHashFunction;
-
-/**
- * XX
- */
-public class XX implements Hash.Factory {
-
- static final LongHashFunction INSTANCE = LongHashFunction.xx();
-
- @Override
- public String getAlgorithm() {
- return "XX";
- }
-
- @Override
- public Hash.Algorithm algorithm() {
- return new XX.Algorithm();
- }
-
- @Override
- public Hash.Checksum checksum(int count) {
- return new XX.Checksum(ByteBuffer.allocate(capacity(count)));
- }
-
- static int capacity(int count) {
- // Java 8: Long.BYTES
- return count * Long.SIZE / Byte.SIZE;
- }
-
- static class Algorithm implements Hash.Algorithm {
-
- @Override
- public byte[] hash(byte[] array) {
- return HexUtils.toByteArray(INSTANCE.hashBytes(array));
- }
-
- @Override
- public byte[] hash(Path path) throws IOException {
- return hash(Files.readAllBytes(path));
- }
- }
-
- static class Checksum implements Hash.Checksum {
-
- private final ByteBuffer buffer;
-
- Checksum(ByteBuffer buffer) {
- this.buffer = buffer;
- }
-
- @Override
- public void update(byte[] hash) {
- buffer.put(hash);
- }
-
- @Override
- public byte[] digest() {
- return HexUtils.toByteArray(INSTANCE.hashBytes(buffer, 0, buffer.position()));
- }
- }
-}
diff --git a/src/main/java/org/apache/maven/buildcache/hash/XXMM.java b/src/main/java/org/apache/maven/buildcache/hash/XXMM.java
deleted file mode 100644
index 9eedd1f..0000000
--- a/src/main/java/org/apache/maven/buildcache/hash/XXMM.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.maven.buildcache.hash;
-
-import java.io.IOException;
-import java.nio.channels.FileChannel;
-import java.nio.file.Path;
-
-import static java.nio.channels.FileChannel.MapMode.READ_ONLY;
-import static java.nio.file.StandardOpenOption.READ;
-
-/**
- * XXMM
- */
-public class XXMM implements Hash.Factory {
-
- private static final ThreadLocal<CloseableBuffer> BUFFER = new ThreadLocal<>();
-
- @Override
- public String getAlgorithm() {
- return "XXMM";
- }
-
- @Override
- public Hash.Algorithm algorithm() {
- return new Algorithm();
- }
-
- @Override
- public Hash.Checksum checksum(int count) {
- return new XX.Checksum(ThreadLocalBuffer.get(BUFFER, XX.capacity(count)));
- }
-
- private static class Algorithm extends XX.Algorithm {
-
- @Override
- public byte[] hash(Path path) throws IOException {
- try (FileChannel channel = FileChannel.open(path, READ);
- CloseableBuffer buffer = CloseableBuffer.mappedBuffer(channel, READ_ONLY)) {
- return HexUtils.toByteArray(XX.INSTANCE.hashBytes(buffer.getBuffer()));
- }
- }
- }
-}
diff --git a/src/main/java/org/apache/maven/buildcache/hash/Zah.java b/src/main/java/org/apache/maven/buildcache/hash/Zah.java
new file mode 100644
index 0000000..befea77
--- /dev/null
+++ b/src/main/java/org/apache/maven/buildcache/hash/Zah.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.maven.buildcache.hash;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import net.openhft.hashing.LongHashFunction;
+
+import static java.nio.channels.FileChannel.MapMode.READ_ONLY;
+import static java.nio.file.StandardOpenOption.READ;
+
+/**
+ * Zero-Allocation-Hash based factory
+ */
+public class Zah implements Hash.Factory {
+
+ public enum MemoryPolicy {
+ Standard,
+ MemoryMappedBuffers
+ }
+
+ private final String name;
+ private final LongHashFunction hash;
+ private final MemoryPolicy memoryPolicy;
+
+ public Zah(String name, LongHashFunction hash, MemoryPolicy memoryPolicy) {
+ this.name = name;
+ this.hash = hash;
+ this.memoryPolicy = memoryPolicy != null ? memoryPolicy : MemoryPolicy.Standard;
+ }
+
+ @Override
+ public String getAlgorithm() {
+ return name;
+ }
+
+ @Override
+ public Hash.Algorithm algorithm() {
+ switch (memoryPolicy) {
+ case MemoryMappedBuffers:
+ return new AlgorithmWithMM();
+ default:
+ return new Algorithm();
+ }
+ }
+
+ @Override
+ public Hash.Checksum checksum(int count) {
+ return new Zah.Checksum(ByteBuffer.allocate(capacity(count)));
+ }
+
+ static int capacity(int count) {
+ // Java 8: Long.BYTES
+ return count * Long.SIZE / Byte.SIZE;
+ }
+
+ class Algorithm implements Hash.Algorithm {
+
+ @Override
+ public byte[] hash(byte[] array) {
+ return HexUtils.toByteArray(hash.hashBytes(array));
+ }
+
+ @Override
+ public byte[] hash(Path path) throws IOException {
+ return hash(Files.readAllBytes(path));
+ }
+ }
+
+ class AlgorithmWithMM implements Hash.Algorithm {
+
+ @Override
+ public byte[] hash(byte[] array) {
+ return HexUtils.toByteArray(hash.hashBytes(array));
+ }
+
+ @Override
+ public byte[] hash(Path path) throws IOException {
+ try (FileChannel channel = FileChannel.open(path, READ);
+ CloseableBuffer buffer = CloseableBuffer.mappedBuffer(channel, READ_ONLY)) {
+ return HexUtils.toByteArray(hash.hashBytes(buffer.getBuffer()));
+ }
+ }
+ }
+
+ class Checksum implements Hash.Checksum {
+
+ private final ByteBuffer buffer;
+
+ Checksum(ByteBuffer buffer) {
+ this.buffer = buffer;
+ }
+
+ @Override
+ public void update(byte[] hash) {
+ buffer.put(hash);
+ }
+
+ @Override
+ public byte[] digest() {
+ return HexUtils.toByteArray(hash.hashBytes(buffer, 0, buffer.position()));
+ }
+ }
+}
diff --git a/src/main/mdo/build-cache-config.mdo b/src/main/mdo/build-cache-config.mdo
index e546db6..871d9c0 100644
--- a/src/main/mdo/build-cache-config.mdo
+++ b/src/main/mdo/build-cache-config.mdo
@@ -184,7 +184,7 @@
<name>hashAlgorithm</name>
<type>String</type>
<defaultValue>XX</defaultValue>
- <description>One of XX, XXMM, SHA-1, SHA-256, SHA-384, SHA-512</description>
+ <description>One of XX, XXMM, METRO, METRO+MM, SHA-1, SHA-256, SHA-384, SHA-512</description>
</field>
<field>
<name>validateXml</name>
diff --git a/src/site/markdown/performance.md b/src/site/markdown/performance.md
index 060911a..6031e90 100644
--- a/src/site/markdown/performance.md
+++ b/src/site/markdown/performance.md
@@ -23,19 +23,20 @@
### Hash algorithm selection
-By default, the cache uses the SHA-256 algorithm, which is sufficiently fast and provides a negligible probability of hash
-collisions. In projects with a large codebase, the performance of hash algorithms becomes more critical, and other algorithms like
-[XX](https://cyan4973.github.io/xxHash/) or XXMM (memory-mapped files) could provide better performance.
+By default, the cache uses the [XX](https://cyan4973.github.io/xxHash/) algorithm, which is a very fast hash algorithm and should be enough for most use cases.
+In projects with a large codebase, the performance of hash algorithms becomes more critical, and other algorithms like
+XXMM (XX with memory-mapped files) could provide better performance, depending on the environment.
```xml
<hashAlgorithm>XX</hashAlgorithm>
```
-or
-```xml
-
-<hashAlgorithm>XXMM</hashAlgorithm>
+Also note that the usage of the XXMM or METRO+MM algorithms require the creation of a file `.mvn/jvm.config` in the
+top directory with the following line to run successfully on JDK >= 17.
```
+--add-opens java.base/sun.nio.ch=ALL-UNNAMED
+```
+
### Filter out unnecessary artifacts
diff --git a/src/test/java/org/apache/maven/buildcache/hash/PerfTest.java b/src/test/java/org/apache/maven/buildcache/hash/PerfTest.java
new file mode 100644
index 0000000..3c10f66
--- /dev/null
+++ b/src/test/java/org/apache/maven/buildcache/hash/PerfTest.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.maven.buildcache.hash;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.openjdk.jmh.runner.options.TimeValue;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 3, time = 3, timeUnit = TimeUnit.SECONDS)
+public class PerfTest {
+
+ @State(Scope.Benchmark)
+ public static class HashState {
+ List<Path> paths;
+
+ @Setup(Level.Iteration)
+ public void setUp() throws IOException {
+ try (Stream<Path> stream = Files.walk(Paths.get(System.getProperty("user.dir")))) {
+ paths = stream.filter(p -> p.getFileName().toString().endsWith(".java"))
+ .collect(Collectors.toList());
+ }
+ }
+ }
+
+ String doTest(HashFactory hashFactory, HashState state) throws IOException {
+ HashAlgorithm hash = hashFactory.createAlgorithm();
+ StringBuilder sb = new StringBuilder();
+ for (Path path : state.paths) {
+ if (sb.length() > 0) {
+ sb.append("\n");
+ }
+ sb.append(hash.hash(path));
+ }
+ return sb.toString();
+ }
+
+ @Benchmark
+ public String SHA1(HashState state) throws IOException {
+ return doTest(HashFactory.SHA1, state);
+ }
+
+ @Benchmark
+ public String SHA256(HashState state) throws IOException {
+ return doTest(HashFactory.SHA256, state);
+ }
+
+ @Benchmark
+ public String XX(HashState state) throws IOException {
+ return doTest(HashFactory.XX, state);
+ }
+
+ @Benchmark
+ public String XXMM(HashState state) throws IOException {
+ return doTest(HashFactory.XXMM, state);
+ }
+
+ @Benchmark
+ public String METRO(HashState state) throws IOException {
+ return doTest(HashFactory.METRO, state);
+ }
+
+ @Benchmark
+ public String METRO_MM(HashState state) throws IOException {
+ return doTest(HashFactory.METRO_MM, state);
+ }
+
+ /*
+ * <p>main.</p>
+ *
+ * @param args a {@link java.lang.String} object.
+ * @throws org.openjdk.jmh.runner.RunnerException if any.
+ */
+ public static void main(String... args) throws RunnerException {
+ Options opts = new OptionsBuilder()
+ .measurementIterations(3)
+ .measurementTime(TimeValue.milliseconds(3000))
+ .forks(1)
+ .build();
+ new Runner(opts).run();
+ }
+}