blob: 08863bb6d27ee5c3a792f8e905e9e203c52cb316 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.hadoop.hbase.filter;
import static org.junit.Assert.*;
import java.util.regex.Pattern;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.filter.RegexStringComparator.EngineType;
import org.apache.hadoop.hbase.testclassification.FilterTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({FilterTests.class, SmallTests.class})
public class TestRegexComparator {
public static final HBaseClassTestRule CLASS_RULE =
public void testSerialization() throws Exception {
// Default engine is the Java engine
RegexStringComparator a = new RegexStringComparator("a|b");
RegexStringComparator b = RegexStringComparator.parseFrom(a.toByteArray());
assertTrue(b.getEngine() instanceof RegexStringComparator.JavaRegexEngine);
// joni engine
a = new RegexStringComparator("a|b", EngineType.JONI);
b = RegexStringComparator.parseFrom(a.toByteArray());
assertTrue(b.getEngine() instanceof RegexStringComparator.JoniRegexEngine);
public void testJavaEngine() throws Exception {
for (TestCase t: TEST_CASES) {
boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JAVA)
.compareTo(Bytes.toBytes(t.haystack)) == 0;
assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
public void testJoniEngine() throws Exception {
for (TestCase t: TEST_CASES) {
boolean result = new RegexStringComparator(t.regex, t.flags, EngineType.JONI)
.compareTo(Bytes.toBytes(t.haystack)) == 0;
assertEquals("Regex '" + t.regex + "' failed test '" + t.haystack + "'", result,
private static class TestCase {
String regex;
String haystack;
int flags;
boolean expected;
public TestCase(String regex, String haystack, boolean expected) {
this(regex, Pattern.DOTALL, haystack, expected);
public TestCase(String regex, int flags, String haystack, boolean expected) {
this.regex = regex;
this.flags = flags;
this.haystack = haystack;
this.expected = expected;
// These are a subset of the regex tests from OpenJDK 7
private static TestCase TEST_CASES[] = {
new TestCase("a|b", "a", true),
new TestCase("a|b", "b", true),
new TestCase("a|b", Pattern.CASE_INSENSITIVE, "A", true),
new TestCase("a|b", Pattern.CASE_INSENSITIVE, "B", true),
new TestCase("a|b", "z", false),
new TestCase("a|b|cd", "cd", true),
new TestCase("z(a|ac)b", "zacb", true),
new TestCase("[abc]+", "ababab", true),
new TestCase("[abc]+", "defg", false),
new TestCase("[abc]+[def]+[ghi]+", "zzzaaddggzzz", true),
new TestCase("[a-\\u4444]+", "za-9z", true),
new TestCase("[^abc]+", "ababab", false),
new TestCase("[^abc]+", "aaabbbcccdefg", true),
new TestCase("[abc^b]", "b", true),
new TestCase("[abc[def]]", "b", true),
new TestCase("[abc[def]]", "e", true),
new TestCase("[a-c[d-f[g-i]]]", "h", true),
new TestCase("[a-c[d-f[g-i]]m]", "m", true),
new TestCase("[a-c&&[d-f]]", "a", false),
new TestCase("[a-c&&[d-f]]", "z", false),
new TestCase("[a-m&&m-z&&a-c]", "m", false),
new TestCase("[a-m&&m-z&&a-z]", "m", true),
new TestCase("[[a-m]&&[^a-c]]", "a", false),
new TestCase("[[a-m]&&[^a-c]]", "d", true),
new TestCase("[[a-c][d-f]&&abc[def]]", "e", true),
new TestCase("[[a-c]&&[b-d]&&[c-e]]", "c", true),
new TestCase("[[a-c]&&[b-d][c-e]&&[u-z]]", "c", false),
new TestCase("[[a]&&[b][c][a]&&[^d]]", "a", true),
new TestCase("[[a]&&[b][c][a]&&[^d]]", "d", false),
new TestCase("[[[a-d]&&[c-f]]&&[c]&&c&&[cde]]", "c", true),
new TestCase("[x[[wz]abc&&bcd[z]]&&[u-z]]", "z", true),
new TestCase("a.c.+", "a#c%&", true),
new TestCase("ab.", "ab\n", true),
new TestCase("(?s)ab.", "ab\n", true),
new TestCase("ab\\wc", "abcc", true),
new TestCase("\\W\\w\\W", "#r#", true),
new TestCase("\\W\\w\\W", "rrrr#ggg", false),
new TestCase("abc[\\sdef]*", "abc def", true),
new TestCase("abc[\\sy-z]*", "abc y z", true),
new TestCase("abc[a-d\\sm-p]*", "abcaa mn p", true),
new TestCase("\\s\\s\\s", "blah err", false),
new TestCase("\\S\\S\\s", "blah err", true),
new TestCase("ab\\dc", "ab9c", true),
new TestCase("\\d\\d\\d", "blah45", false),
new TestCase("^abc", "abcdef", true),
new TestCase("^abc", "bcdabc", false),
new TestCase("^(a)?a", "a", true),
new TestCase("^(aa(bb)?)+$", "aabbaa", true),
new TestCase("((a|b)?b)+", "b", true),
new TestCase("^(a(b)?)+$", "aba", true),
new TestCase("^(a(b(c)?)?)?abc", "abc", true),
new TestCase("^(a(b(c))).*", "abc", true),
new TestCase("a?b", "aaaab", true),
new TestCase("a?b", "aaacc", false),
new TestCase("a??b", "aaaab", true),
new TestCase("a??b", "aaacc", false),
new TestCase("a?+b", "aaaab", true),
new TestCase("a?+b", "aaacc", false),
new TestCase("a+b", "aaaab", true),
new TestCase("a+b", "aaacc", false),
new TestCase("a+?b", "aaaab", true),
new TestCase("a+?b", "aaacc", false),
new TestCase("a++b", "aaaab", true),
new TestCase("a++b", "aaacc", false),
new TestCase("a{2,3}", "a", false),
new TestCase("a{2,3}", "aa", true),
new TestCase("a{2,3}", "aaa", true),
new TestCase("a{3,}", "zzzaaaazzz", true),
new TestCase("a{3,}", "zzzaazzz", false),
new TestCase("abc(?=d)", "zzzabcd", true),
new TestCase("abc(?=d)", "zzzabced", false),
new TestCase("abc(?!d)", "zzabcd", false),
new TestCase("abc(?!d)", "zzabced", true),
new TestCase("\\w(?<=a)", "###abc###", true),
new TestCase("\\w(?<=a)", "###ert###", false),
new TestCase("(?<!a)c", "bc", true),
new TestCase("(?<!a)c", "ac", false),
new TestCase("(a+b)+", "ababab", true),
new TestCase("(a+b)+", "accccd", false),
new TestCase("(ab)+", "ababab", true),
new TestCase("(ab)+", "accccd", false),
new TestCase("(ab)(cd*)", "zzzabczzz", true),
new TestCase("abc(d)*abc", "abcdddddabc", true),
new TestCase("a*b", "aaaab", true),
new TestCase("a*b", "b", true),
new TestCase("a*b", "aaaac", false),
new TestCase(".*?b", "aaaab", true),
new TestCase("a*+b", "aaaab", true),
new TestCase("a*+b", "b", true),
new TestCase("a*+b", "aaaac", false),
new TestCase("(?i)foobar", "fOobAr", true),
new TestCase("f(?i)oobar", "fOobAr", true),
new TestCase("f(?i)oobar", "FOobAr", false),
new TestCase("foo(?i)bar", "fOobAr", false),
new TestCase("(?i)foo[bar]+", "foObAr", true),
new TestCase("(?i)foo[a-r]+", "foObAr", true),
new TestCase("abc(?x)blah", "abcblah", true),
new TestCase("abc(?x) blah", "abcblah", true),
new TestCase("abc(?x) blah blech", "abcblahblech", true),
new TestCase("[\\n-#]", "!", true),
new TestCase("[\\n-#]", "-", false),
new TestCase("[\\043]+", "blahblah#blech", true),
new TestCase("[\\042-\\044]+", "blahblah#blech", true),
new TestCase("[\\u1234-\\u1236]", "blahblah\u1235blech", true),
new TestCase("[^\043]*", "blahblah#blech", true),
new TestCase("(|f)?+", "foo", true),