lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/BaseSynonymParserTestCase.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.analysis.synonym;

 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.Util;

 /** Base class for testing synonym parsers. */
 public abstract class BaseSynonymParserTestCase extends BaseTokenStreamTestCase {
   /**
    * Helper method to validate synonym parsing.
    *
    * @param synonynMap the generated synonym map after parsing
    * @param word word (phrase) we are validating the synonyms for. Should be the value that comes
    *     out of the analyzer. All spaces will be replaced by word separators.
    * @param includeOrig if synonyms should include original
    * @param synonyms actual synonyms. All word separators are replaced with a single space.
    */
   public static void assertEntryEquals(
       SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
     word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
     BytesRef value =
         Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
     assertNotNull("No synonyms found for: " + word, value);

     ByteArrayDataInput bytesReader =
         new ByteArrayDataInput(value.bytes, value.offset, value.length);
     final int code = bytesReader.readVInt();

     final boolean keepOrig = (code & 0x1) == 0;
     assertEquals(
         "Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
         includeOrig,
         keepOrig);

     final int count = code >>> 1;
     assertEquals(
         "Invalid synonym count. Expected " + synonyms.length + " was " + count,
         synonyms.length,
         count);

     Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

     BytesRef scratchBytes = new BytesRef();
     for (int i = 0; i < count; i++) {
       synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
       String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
       assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
     }
   }

   /**
    * Validates that there are no synonyms for the given word.
    *
    * @param synonynMap the generated synonym map after parsing
    * @param word word (phrase) we are validating the synonyms for. Should be the value that comes
    *     out of the analyzer. All spaces will be replaced by word separators.
    */
   public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
     word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
     BytesRef value =
         Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
     assertNull("There should be no synonyms for: " + word, value);
   }

   public static void assertEntryEquals(
       SynonymMap synonynMap, String word, boolean includeOrig, String synonym) throws Exception {
     assertEntryEquals(synonynMap, word, includeOrig, new String[] {synonym});
   }

   public static void assertAnalyzesToPositions(
       Analyzer a,
       String input,
       String[] output,
       String[] types,
       int[] posIncrements,
       int[] posLengths)
       throws IOException {
     assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.analysis.synonym;

	import java.io.IOException;
	import java.util.Arrays;
	import java.util.HashSet;
	import java.util.Set;
	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.BaseTokenStreamTestCase;
	import org.apache.lucene.store.ByteArrayDataInput;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.CharsRef;
	import org.apache.lucene.util.IntsRefBuilder;
	import org.apache.lucene.util.fst.Util;

	/** Base class for testing synonym parsers. */
	public abstract class BaseSynonymParserTestCase extends BaseTokenStreamTestCase {
	/**
	* Helper method to validate synonym parsing.
	*
	* @param synonynMap the generated synonym map after parsing
	* @param word word (phrase) we are validating the synonyms for. Should be the value that comes
	* out of the analyzer. All spaces will be replaced by word separators.
	* @param includeOrig if synonyms should include original
	* @param synonyms actual synonyms. All word separators are replaced with a single space.
	*/
	public static void assertEntryEquals(
	SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception {
	word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
	BytesRef value =
	Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
	assertNotNull("No synonyms found for: " + word, value);

	ByteArrayDataInput bytesReader =
	new ByteArrayDataInput(value.bytes, value.offset, value.length);
	final int code = bytesReader.readVInt();

	final boolean keepOrig = (code & 0x1) == 0;
	assertEquals(
	"Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
	includeOrig,
	keepOrig);

	final int count = code >>> 1;
	assertEquals(
	"Invalid synonym count. Expected " + synonyms.length + " was " + count,
	synonyms.length,
	count);

	Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

	BytesRef scratchBytes = new BytesRef();
	for (int i = 0; i < count; i++) {
	synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
	String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
	assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
	}
	}

	/**
	* Validates that there are no synonyms for the given word.
	*
	* @param synonynMap the generated synonym map after parsing
	* @param word word (phrase) we are validating the synonyms for. Should be the value that comes
	* out of the analyzer. All spaces will be replaced by word separators.
	*/
	public static void assertEntryAbsent(SynonymMap synonynMap, String word) throws IOException {
	word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
	BytesRef value =
	Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
	assertNull("There should be no synonyms for: " + word, value);
	}

	public static void assertEntryEquals(
	SynonymMap synonynMap, String word, boolean includeOrig, String synonym) throws Exception {
	assertEntryEquals(synonynMap, word, includeOrig, new String[] {synonym});
	}

	public static void assertAnalyzesToPositions(
	Analyzer a,
	String input,
	String[] output,
	String[] types,
	int[] posIncrements,
	int[] posLengths)
	throws IOException {
	assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths);
	}
	}