blob: 6cea13c925e1f3ab6ae03b6c926d022c859fe1f9 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.metron.stellar.dsl.functions;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang.SerializationUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.metron.stellar.common.utils.hashing.tlsh.TLSHHasher;
import org.junit.Assert;
import org.junit.Test;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ForkJoinPool;
import static;
import static org.junit.Assert.*;
public class HashFunctionsTest {
static final Hex HEX = new Hex(StandardCharsets.UTF_8);
final HashFunctions.ListSupportedHashTypes listSupportedHashTypes = new HashFunctions.ListSupportedHashTypes();
final HashFunctions.Hash hash = new HashFunctions.Hash();
@Test(expected = IllegalArgumentException.class)
public void nullArgumentsShouldFail() throws Exception {
@Test(expected = IllegalArgumentException.class)
public void getSupportedHashAlgorithmsCalledWithParametersShouldFail() throws Exception {
public void listSupportedHashTypesReturnsAtMinimumTheHashingAlgorithmsThatMustBeSupported() throws Exception {
final List<String> requiredAlgorithmsByJava = Arrays.asList("MD5", "SHA", "SHA-256"); // These are required for all Java platforms (see Note: SHA is SHA-1
final Collection<String> supportedHashes = listSupportedHashTypes.apply(Collections.emptyList());
requiredAlgorithmsByJava.forEach(a -> assertTrue(supportedHashes.contains(a)));
@Test(expected = IllegalArgumentException.class)
public void nullArgumentListShouldThrowException() throws Exception {
@Test(expected = IllegalArgumentException.class)
public void emptyArgumentListShouldThrowException() throws Exception {
@Test(expected = IllegalArgumentException.class)
public void singleArgumentListShouldThrowException() throws Exception {
hash.apply(Collections.singletonList("some value."));
@Test(expected = IllegalArgumentException.class)
public void argumentListWithMoreThanTwoValuesShouldThrowException3() throws Exception {
hash.apply(Arrays.asList("1", "2", "3"));
@Test(expected = IllegalArgumentException.class)
public void argumentListWithMoreThanTwoValuesShouldThrowException4() throws Exception {
hash.apply(Arrays.asList("1", "2", "3", "4"));
@Test(expected = IllegalArgumentException.class)
public void invalidAlgorithmArgumentShouldThrowException() throws Exception {
hash.apply(Arrays.asList("value to hash", "invalidAlgorithm"));
public void invalidNullAlgorithmArgumentShouldReturnNull() throws Exception {
assertNull(hash.apply(Arrays.asList("value to hash", null)));
public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00() throws Exception {
assertEquals(StringUtils.repeat('0', 32), hash.apply(Arrays.asList(null, "md5")));
public void nullInputForValueToHashShouldReturnHashedEncodedValueOf0x00InDirectStellarCall() throws Exception {
final String algorithm = "'md5'";
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", null);
assertEquals(StringUtils.repeat('0', 32), run("HASH(toHash, " + algorithm + ")", variables));
public void allAlgorithmsForMessageDigestShouldBeAbleToHash() throws Exception {
final String valueToHash = "My value to hash";
final Set<String> algorithms = Security.getAlgorithms("MessageDigest");
algorithms.forEach(algorithm -> {
try {
final MessageDigest expected = MessageDigest.getInstance(algorithm);
assertEquals(expectedHexString(expected), hash.apply(Arrays.asList(valueToHash, algorithm)));
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
public void allAlgorithmsForMessageDigestShouldBeAbleToHashDirectStellarCall() throws Exception {
final String valueToHash = "My value to hash";
final Set<String> algorithms = Security.getAlgorithms("MessageDigest");
algorithms.forEach(algorithm -> {
try {
final Object actual = run("HASH('" + valueToHash + "', '" + algorithm + "')", Collections.emptyMap());
final MessageDigest expected = MessageDigest.getInstance(algorithm);
assertEquals(expectedHexString(expected), actual);
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
public void nonStringValueThatIsSerializableHashesSuccessfully() throws Exception {
final String algorithm = "'md5'";
final String valueToHash = "'My value to hash'";
final Serializable input = (Serializable) Collections.singletonList(valueToHash);
final MessageDigest expected = MessageDigest.getInstance(algorithm.replace("'", ""));
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", input);
assertEquals(expectedHexString(expected), run("HASH(toHash, " + algorithm + ")", variables));
public void callingHashFunctionsWithVariablesAsInputHashesSuccessfully() throws Exception {
final String algorithm = "md5";
final String valueToHash = "'My value to hash'";
final Serializable input = (Serializable) Collections.singletonList(valueToHash);
final MessageDigest expected = MessageDigest.getInstance(algorithm);
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", input);
variables.put("hashType", algorithm);
assertEquals(expectedHexString(expected), run("HASH(toHash, hashType)", variables));
public void callingHashFunctionWhereOnlyHashTypeIsAVariableHashesSuccessfully() throws Exception {
final String algorithm = "md5";
final String valueToHash = "'My value to hash'";
final MessageDigest expected = MessageDigest.getInstance(algorithm);
expected.update(valueToHash.replace("'", "").getBytes(StandardCharsets.UTF_8));
final Map<String, Object> variables = new HashMap<>();
variables.put("hashType", algorithm);
assertEquals(expectedHexString(expected), run("HASH(" + valueToHash + ", hashType)", variables));
public void aNonNullNonSerializableObjectReturnsAValueOfNull() throws Exception {
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", new Object());
assertNull(run("HASH(toHash, 'md5')", variables));
public static String TLSH_DATA = "The best documentation is the UNIX source. After all, this is what the "
+ "system uses for documentation when it decides what to do next! The "
+ "manuals paraphrase the source code, often having been written at "
+ "different times and by different people than who wrote the code. "
+ "Think of them as guidelines. Sometimes they are more like wishes... "
+ "Nonetheless, it is all too common to turn to the source and find "
+ "options and behaviors that are not documented in the manual. Sometimes "
+ "you find options described in the manual that are unimplemented "
+ "and ignored by the source.";
String TLSH_EXPECTED = "6FF02BEF718027B0160B4391212923ED7F1A463D563B1549B86CF62973B197AD2731F8";
public void tlsh_happyPath() throws Exception {
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", TLSH_DATA);
variables.put("toHashBytes", TLSH_DATA.getBytes(StandardCharsets.UTF_8));
//this value is pulled from a canonical example at
assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'tlsh')", variables));
assertEquals(TLSH_EXPECTED, run("HASH(toHash, 'TLSH')", variables));
assertEquals(TLSH_EXPECTED, run("HASH(toHashBytes, 'tlsh')", variables));
public void tlsh_multiBin() throws Exception {
final Map<String, Object> variables = new HashMap<>();
variables.put("toHash", TLSH_DATA);
Map<String, String> out = (Map<String, String>)run("HASH(toHash, 'tlsh', { 'hashes' : [ 8, 16, 32 ]} )", variables);
for(int h : ImmutableList.of(8, 16, 32)) {
Assert.assertTrue(out.containsKey(TLSHHasher.TLSH_BIN_KEY + "_" + h));
public void tlsh_multithread() throws Exception {
//we want to ensure that everything is threadsafe, so we'll spin up some random data
//generate some hashes and then do it all in parallel and make sure it all matches.
Map<Map.Entry<byte[], Map<String, Object>>, String> hashes = new HashMap<>();
Random r = new Random(0);
for(int i = 0;i < 20;++i) {
byte[] d = new byte[256];
Map<String, Object> config = new HashMap<String, Object>()
put(TLSHHasher.Config.BUCKET_SIZE.key, r.nextBoolean() ? 128 : 256);
put(TLSHHasher.Config.CHECKSUM.key, r.nextBoolean() ? 1 : 3);
String hash = (String)run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", d));
hashes.put(new AbstractMap.SimpleEntry<>(d, config), hash);
ForkJoinPool forkJoinPool = new ForkJoinPool(5);
forkJoinPool.submit(() ->
kv -> {
Map<String, Object> config = kv.getKey().getValue();
byte[] data = kv.getKey().getKey();
String hash = (String)run("HASH(data, 'tlsh', config)", ImmutableMap.of("config", config, "data", data));
Assert.assertEquals(hash, kv.getValue());
public void tlsh_similarity() throws Exception {
for(Map.Entry<String, String> kv : ImmutableMap.of("been", "ben", "document", "dokumant", "code", "cad").entrySet()) {
Map<String, Object> variables = ImmutableMap.of("toHash", TLSH_DATA, "toHashSimilar", TLSH_DATA.replace(kv.getKey(), kv.getValue()));
Map<String, Object> bin1 = (Map<String, Object>) run("HASH(toHashSimilar, 'tlsh', { 'hashes' : 4, 'bucketSize' : 128 })", variables);
Map<String, Object> bin2 = (Map<String, Object>) run("HASH(toHash, 'tlsh', { 'hashes' : [ 4 ], 'bucketSize' : 128 })", variables);
assertEquals(kv.getKey() + " != " + kv.getValue() + " because " + bin1.get("tlsh") + " != " + bin2.get("tlsh"), bin1.get("tlsh_bin"), bin2.get("tlsh_bin"));
assertNotEquals(bin1.get("tlsh"), bin2.get("tlsh"));
Map<String, Object> distVariables = ImmutableMap.of("hash1", bin1.get(TLSHHasher.TLSH_KEY), "hash2", bin2.get(TLSHHasher.TLSH_KEY));
//ensure the diff is minimal
Integer diff = (Integer) run("TLSH_DIST( hash1, hash2)", distVariables);
Integer diffReflexive = (Integer) run("TLSH_DIST( hash2, hash1)", distVariables);
Assert.assertTrue("diff == " + diff, diff < 100);
Assert.assertEquals(diff, diffReflexive);
//ensure that d(x,x) == 0
Integer diff = (Integer) run("TLSH_DIST( hash1, hash1)", distVariables);
Assert.assertEquals((int)0, (int)diff);
public void tlshDist_invalidInput() throws Exception {
final Map<String, Object> variables = new HashMap<>();
variables.put("hash1", 1);
variables.put("hash2", TLSH_EXPECTED);
run("TLSH_DIST( hash1, hash1)", variables);
public void tlsh_insufficientComplexity() throws Exception {
final Map<String, Object> variables = new HashMap<>();
String data = "Metron is the best";
variables.put("toHash", data);
assertNull(run("HASH(toHash, 'tlsh')", variables));
public void tlsh_nullInput() throws Exception {
final Map<String, Object> variables = new HashMap<>();
String data = null;
variables.put("toHash", data);
assertNull(run("HASH(toHash, 'tlsh')", variables));
private String expectedHexString(MessageDigest expected) {
return new String(HEX.encode(expected.digest()), StandardCharsets.UTF_8);