blob: 3f5238f6c6c2443c4d71b0e2a46da8cb3901f0ba [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
import java.util.*;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.SuppressForbidden;
public class TestCharArraySet extends LuceneTestCase {
static final String[] TEST_STOP_WORDS = {
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they",
"this", "to", "was", "will", "with"
};
public void testRehash() throws Exception {
CharArraySet cas = new CharArraySet(0, true);
for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
cas.add(TEST_STOP_WORDS[i]);
}
assertEquals(TEST_STOP_WORDS.length, cas.size());
for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
assertTrue(cas.contains(TEST_STOP_WORDS[i]));
}
}
public void testNonZeroOffset() {
String[] words = {"Hello", "World", "this", "is", "a", "test"};
char[] findme = "xthisy".toCharArray();
CharArraySet set = new CharArraySet(10, true);
set.addAll(Arrays.asList(words));
assertTrue(set.contains(findme, 1, 4));
assertTrue(set.contains(new String(findme, 1, 4)));
// test unmodifiable
set = CharArraySet.unmodifiableSet(set);
assertTrue(set.contains(findme, 1, 4));
assertTrue(set.contains(new String(findme, 1, 4)));
}
@SuppressForbidden(reason = "Explicitly checking new Integers")
public void testObjectContains() {
CharArraySet set = new CharArraySet(10, true);
Integer val = Integer.valueOf(1);
@SuppressWarnings("deprecation")
Integer val1 = new Integer(1);
// Verify explicitly the case of different Integer instances
assertNotSame(val, val1);
set.add(val);
assertTrue(set.contains(val));
assertTrue(set.contains(val1)); // another integer
assertTrue(set.contains("1"));
assertTrue(set.contains(new char[] {'1'}));
// test unmodifiable
set = CharArraySet.unmodifiableSet(set);
assertTrue(set.contains(val));
assertTrue(set.contains(val1)); // another integer
assertTrue(set.contains("1"));
assertTrue(set.contains(new char[] {'1'}));
}
public void testClear() {
CharArraySet set = new CharArraySet(10, true);
set.addAll(Arrays.asList(TEST_STOP_WORDS));
assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
set.clear();
assertEquals("not empty", 0, set.size());
for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
assertFalse(set.contains(TEST_STOP_WORDS[i]));
}
set.addAll(Arrays.asList(TEST_STOP_WORDS));
assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
assertTrue(set.contains(TEST_STOP_WORDS[i]));
}
}
// TODO: break this up into simpler test methods, vs "telling a story"
public void testModifyOnUnmodifiable() {
CharArraySet set = new CharArraySet(10, true);
set.addAll(Arrays.asList(TEST_STOP_WORDS));
final int size = set.size();
CharArraySet unmodifiableSet = CharArraySet.unmodifiableSet(set);
assertEquals("Set size changed due to unmodifiableSet call", size, unmodifiableSet.size());
String NOT_IN_SET = "SirGallahad";
assertFalse("Test String already exists in set", unmodifiableSet.contains(NOT_IN_SET));
expectThrows(
UnsupportedOperationException.class, () -> unmodifiableSet.add(NOT_IN_SET.toCharArray()));
assertFalse(
"Test String has been added to unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(UnsupportedOperationException.class, () -> unmodifiableSet.add(NOT_IN_SET));
assertFalse(
"Test String has been added to unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(
UnsupportedOperationException.class,
() -> unmodifiableSet.add(new StringBuilder(NOT_IN_SET)));
assertFalse(
"Test String has been added to unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(UnsupportedOperationException.class, () -> unmodifiableSet.clear());
assertFalse("Changed unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(
UnsupportedOperationException.class, () -> unmodifiableSet.add((Object) NOT_IN_SET));
assertFalse(
"Test String has been added to unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
// This test was changed in 3.1, as a contains() call on the given Collection using the
// "correct" iterator's
// current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor
// never call
// remove() on the iterator
expectThrows(
UnsupportedOperationException.class,
() -> unmodifiableSet.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true)));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(
UnsupportedOperationException.class,
() -> unmodifiableSet.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true)));
assertEquals("Size of unmodifiable set has changed", size, unmodifiableSet.size());
expectThrows(
UnsupportedOperationException.class,
() -> unmodifiableSet.addAll(Arrays.asList(NOT_IN_SET)));
assertFalse(
"Test String has been added to unmodifiable set", unmodifiableSet.contains(NOT_IN_SET));
for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
assertTrue(set.contains(TEST_STOP_WORDS[i]));
assertTrue(unmodifiableSet.contains(TEST_STOP_WORDS[i]));
}
}
public void testUnmodifiableSet() {
CharArraySet set = new CharArraySet(10, true);
set.addAll(Arrays.asList(TEST_STOP_WORDS));
set.add(Integer.valueOf(1));
final int size = set.size();
set = CharArraySet.unmodifiableSet(set);
assertEquals("Set size changed due to unmodifiableSet call", size, set.size());
for (String stopword : TEST_STOP_WORDS) {
assertTrue(set.contains(stopword));
}
assertTrue(set.contains(Integer.valueOf(1)));
assertTrue(set.contains("1"));
assertTrue(set.contains(new char[] {'1'}));
expectThrows(
NullPointerException.class,
() -> {
CharArraySet.unmodifiableSet(null);
});
}
public void testSupplementaryChars() {
String missing = "Term %s is missing in the set";
String falsePos = "Term %s is in the set but shouldn't";
// for reference see
// http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
String[] upperArr =
new String[] {"Abc\ud801\udc1c", "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
String[] lowerArr =
new String[] {"abc\ud801\udc44", "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
for (String upper : upperArr) {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
}
set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
for (String upper : upperArr) {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
}
}
public void testSingleHighSurrogate() {
String missing = "Term %s is missing in the set";
String falsePos = "Term %s is in the set but shouldn't";
String[] upperArr =
new String[] {"ABC\uD800", "ABC\uD800EfG", "\uD800EfG", "\uD800\ud801\udc1cB"};
String[] lowerArr =
new String[] {"abc\uD800", "abc\uD800efg", "\uD800efg", "\uD800\ud801\udc44b"};
CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
for (String upper : upperArr) {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
}
set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
for (String upper : upperArr) {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set.contains(lowerArr[i]));
}
}
public void testCopyCharArraySetBWCompat() {
CharArraySet setIgnoreCase = new CharArraySet(10, true);
CharArraySet setCaseSensitive = new CharArraySet(10, false);
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
setIgnoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIgnoreCase.add(Integer.valueOf(1));
setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
setCaseSensitive.add(Integer.valueOf(1));
CharArraySet copy = CharArraySet.copy(setIgnoreCase);
CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
assertEquals(setIgnoreCase.size(), copy.size());
assertEquals(setCaseSensitive.size(), copy.size());
assertTrue(copy.containsAll(stopwords));
assertTrue(copy.containsAll(stopwordsUpper));
assertTrue(copyCaseSens.containsAll(stopwords));
for (String string : stopwordsUpper) {
assertFalse(copyCaseSens.contains(string));
}
// test adding terms to the copy
List<String> newWords = new ArrayList<>();
for (String string : stopwords) {
newWords.add(string + "_1");
}
copy.addAll(newWords);
assertTrue(copy.containsAll(stopwords));
assertTrue(copy.containsAll(stopwordsUpper));
assertTrue(copy.containsAll(newWords));
// new added terms are not in the source set
for (String string : newWords) {
assertFalse(setIgnoreCase.contains(string));
assertFalse(setCaseSensitive.contains(string));
}
}
/** Test the static #copy() function with a CharArraySet as a source */
public void testCopyCharArraySet() {
CharArraySet setIngoreCase = new CharArraySet(10, true);
CharArraySet setCaseSensitive = new CharArraySet(10, false);
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIngoreCase.add(Integer.valueOf(1));
setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
setCaseSensitive.add(Integer.valueOf(1));
CharArraySet copy = CharArraySet.copy(setIngoreCase);
CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
assertEquals(setIngoreCase.size(), copy.size());
assertEquals(setCaseSensitive.size(), copy.size());
assertTrue(copy.containsAll(stopwords));
assertTrue(copy.containsAll(stopwordsUpper));
assertTrue(copyCaseSens.containsAll(stopwords));
for (String string : stopwordsUpper) {
assertFalse(copyCaseSens.contains(string));
}
// test adding terms to the copy
List<String> newWords = new ArrayList<>();
for (String string : stopwords) {
newWords.add(string + "_1");
}
copy.addAll(newWords);
assertTrue(copy.containsAll(stopwords));
assertTrue(copy.containsAll(stopwordsUpper));
assertTrue(copy.containsAll(newWords));
// new added terms are not in the source set
for (String string : newWords) {
assertFalse(setIngoreCase.contains(string));
assertFalse(setCaseSensitive.contains(string));
}
}
/** Test the static #copy() function with a JDK {@link Set} as a source */
public void testCopyJDKSet() {
Set<String> set = new HashSet<>();
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
set.addAll(Arrays.asList(TEST_STOP_WORDS));
CharArraySet copy = CharArraySet.copy(set);
assertEquals(set.size(), copy.size());
assertEquals(set.size(), copy.size());
assertTrue(copy.containsAll(stopwords));
for (String string : stopwordsUpper) {
assertFalse(copy.contains(string));
}
List<String> newWords = new ArrayList<>();
for (String string : stopwords) {
newWords.add(string + "_1");
}
copy.addAll(newWords);
assertTrue(copy.containsAll(stopwords));
assertTrue(copy.containsAll(newWords));
// new added terms are not in the source set
for (String string : newWords) {
assertFalse(set.contains(string));
}
}
/**
* Tests a special case of {@link CharArraySet#copy(Set)} where the set to copy is the {@link
* CharArraySet#EMPTY_SET}
*/
public void testCopyEmptySet() {
assertSame(CharArraySet.EMPTY_SET, CharArraySet.copy(CharArraySet.EMPTY_SET));
}
/** Smoketests the static empty set */
public void testEmptySet() {
assertEquals(0, CharArraySet.EMPTY_SET.size());
assertTrue(CharArraySet.EMPTY_SET.isEmpty());
for (String stopword : TEST_STOP_WORDS) {
assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
}
assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(), 0, 3));
}
/** Test for NPE */
public void testContainsWithNull() {
CharArraySet set = new CharArraySet(1, true);
expectThrows(
NullPointerException.class,
() -> {
set.contains((char[]) null, 0, 10);
});
expectThrows(
NullPointerException.class,
() -> {
set.contains((CharSequence) null);
});
expectThrows(
NullPointerException.class,
() -> {
set.contains((Object) null);
});
}
public void testToString() {
CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
assertEquals("[test]", set.toString());
set.add("test2");
assertTrue(set.toString().contains(", "));
}
}