| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.analysis.synonym; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.HashSet; |
| import java.util.Set; |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CharsRef; |
| import org.apache.lucene.util.IntsRefBuilder; |
| import org.apache.lucene.util.fst.Util; |
| |
| /** Base class for testing synonym parsers. */ |
| public abstract class BaseSynonymParserTestCase extends BaseTokenStreamTestCase { |
| /** |
| * Helper method to validate synonym parsing. |
| * |
| * @param synonynMap the generated synonym map after parsing |
| * @param word word (phrase) we are validating the synonyms for. Should be the value that comes |
| * out of the analyzer. All spaces will be replaced by word separators. |
| * @param includeOrig if synonyms should include original |
| * @param synonyms actual synonyms. All word separators are replaced with a single space. |
| */ |
| public static void assertEntryEquals( |
| SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception { |
| word = word.replace(' ', SynonymMap.WORD_SEPARATOR); |
| BytesRef value = |
| Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder())); |
| assertNotNull("No synonyms found for: " + word, value); |
| |
| ByteArrayDataInput bytesReader = |
| new ByteArrayDataInput(value.bytes, value.offset, value.length); |
| final int code = bytesReader.readVInt(); |
| |
| final boolean keepOrig = (code & 0x1) == 0; |
| assertEquals( |
| "Include original different than expected. Expected " + includeOrig + " was " + keepOrig, |
| includeOrig, |
| keepOrig); |
| |
| final int count = code >>> 1; |
| assertEquals( |
| "Invalid synonym count. Expected " + synonyms.length + " was " + count, |
| synonyms.length, |
| count); |
| |
| Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms)); |
| |
| BytesRef scratchBytes = new BytesRef(); |
| for (int i = 0; i < count; i++) { |
| synonynMap.words.get(bytesReader.readVInt(), scratchBytes); |
| String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' '); |
| assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym)); |
| } |
| } |
| |
| /** |
| * Validates that there are no synonyms for the given word. |
| * |
| * @param synonynMap the generated synonym map after parsing |
| * @param word word (phrase) we are validating the synonyms for. Should be the value that comes |
| * out of the analyzer. All spaces will be replaced by word separators. |
| */ |
| public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException { |
| word = word.replace(' ', SynonymMap.WORD_SEPARATOR); |
| BytesRef value = |
| Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder())); |
| assertNull("There should be no synonyms for: " + word, value); |
| } |
| |
| public static void assertEntryEquals( |
| SynonymMap synonynMap, String word, boolean includeOrig, String synonym) throws Exception { |
| assertEntryEquals(synonynMap, word, includeOrig, new String[] {synonym}); |
| } |
| |
| public static void assertAnalyzesToPositions( |
| Analyzer a, |
| String input, |
| String[] output, |
| String[] types, |
| int[] posIncrements, |
| int[] posLengths) |
| throws IOException { |
| assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths); |
| } |
| } |