blob: 4a3fd721be2fb1f5c042fb7ffda969510b206887 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.synonym;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.Util;
/** Base class for testing synonym parsers. */
public abstract class BaseSynonymParserTestCase extends BaseTokenStreamTestCase {
/**
* Helper method to validate synonym parsing.
*
* @param synonynMap the generated synonym map after parsing
* @param word word (phrase) we are validating the synonyms for. Should be the value that comes
* out of the analyzer. All spaces will be replaced by word separators.
* @param includeOrig if synonyms should include original
* @param synonyms actual synonyms. All word separators are replaced with a single space.
*/
public static void assertEntryEquals(
SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
BytesRef value =
Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
assertNotNull("No synonyms found for: " + word, value);
ByteArrayDataInput bytesReader =
new ByteArrayDataInput(value.bytes, value.offset, value.length);
final int code = bytesReader.readVInt();
final boolean keepOrig = (code & 0x1) == 0;
assertEquals(
"Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
includeOrig,
keepOrig);
final int count = code >>> 1;
assertEquals(
"Invalid synonym count. Expected " + synonyms.length + " was " + count,
synonyms.length,
count);
Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));
BytesRef scratchBytes = new BytesRef();
for (int i = 0; i < count; i++) {
synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
}
}
/**
* Validates that there are no synonyms for the given word.
*
* @param synonynMap the generated synonym map after parsing
* @param word word (phrase) we are validating the synonyms for. Should be the value that comes
* out of the analyzer. All spaces will be replaced by word separators.
*/
public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
BytesRef value =
Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
assertNull("There should be no synonyms for: " + word, value);
}
public static void assertEntryEquals(
SynonymMap synonynMap, String word, boolean includeOrig, String synonym) throws Exception {
assertEntryEquals(synonynMap, word, includeOrig, new String[] {synonym});
}
public static void assertAnalyzesToPositions(
Analyzer a,
String input,
String[] output,
String[] types,
int[] posIncrements,
int[] posLengths)
throws IOException {
assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths);
}
}