lucene/test-framework/src/java/org/apache/lucene/analysis/VocabularyAssert.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.analysis;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;

 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.junit.Assert;

 /** Utility class for doing vocabulary-based stemming tests */
 public class VocabularyAssert {
   /** Run a vocabulary test against two data files. */
   public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out)
   throws IOException {
     BufferedReader vocReader = new BufferedReader(
         new InputStreamReader(voc, StandardCharsets.UTF_8));
     BufferedReader outputReader = new BufferedReader(
         new InputStreamReader(out, StandardCharsets.UTF_8));
     String inputWord = null;
     while ((inputWord = vocReader.readLine()) != null) {
       String expectedWord = outputReader.readLine();
       Assert.assertNotNull(expectedWord);
       BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
     }
   }

   /** Run a vocabulary test against one file: tab separated. */
   public static void assertVocabulary(Analyzer a, InputStream vocOut)
   throws IOException {
     BufferedReader vocReader = new BufferedReader(
         new InputStreamReader(vocOut, StandardCharsets.UTF_8));
     String inputLine = null;
     while ((inputLine = vocReader.readLine()) != null) {
       if (inputLine.startsWith("#") || inputLine.trim().length() == 0)
         continue; /* comment */
       String words[] = inputLine.split("\t");
       BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
     }
   }

   /** Run a vocabulary test against two data files inside a zip file */
   public static void assertVocabulary(Analyzer a, Path zipFile, String voc, String out) throws IOException {
     Path tmp = LuceneTestCase.createTempDir();
     try (InputStream in = Files.newInputStream(zipFile)) {
       TestUtil.unzip(in, tmp);
     }
     try (InputStream v = Files.newInputStream(tmp.resolve(voc));
          InputStream o = Files.newInputStream(tmp.resolve(out))) {
       assertVocabulary(a, v, o);
     }
   }

   /** Run a vocabulary test against a tab-separated data file inside a zip file */
   public static void assertVocabulary(Analyzer a, Path zipFile, String vocOut) throws IOException {
     Path tmp = LuceneTestCase.createTempDir();
     try (InputStream in = Files.newInputStream(zipFile)) {
       TestUtil.unzip(in, tmp);
     }
     try (InputStream in = Files.newInputStream(tmp.resolve(vocOut))) {
       assertVocabulary(a, in);
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.analysis;

	import java.io.BufferedReader;
	import java.io.IOException;
	import java.io.InputStream;
	import java.io.InputStreamReader;
	import java.nio.charset.StandardCharsets;
	import java.nio.file.Files;
	import java.nio.file.Path;

	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.TestUtil;
	import org.junit.Assert;

	/** Utility class for doing vocabulary-based stemming tests */
	public class VocabularyAssert {
	/** Run a vocabulary test against two data files. */
	public static void assertVocabulary(Analyzer a, InputStream voc, InputStream out)
	throws IOException {
	BufferedReader vocReader = new BufferedReader(
	new InputStreamReader(voc, StandardCharsets.UTF_8));
	BufferedReader outputReader = new BufferedReader(
	new InputStreamReader(out, StandardCharsets.UTF_8));
	String inputWord = null;
	while ((inputWord = vocReader.readLine()) != null) {
	String expectedWord = outputReader.readLine();
	Assert.assertNotNull(expectedWord);
	BaseTokenStreamTestCase.checkOneTerm(a, inputWord, expectedWord);
	}
	}

	/** Run a vocabulary test against one file: tab separated. */
	public static void assertVocabulary(Analyzer a, InputStream vocOut)
	throws IOException {
	BufferedReader vocReader = new BufferedReader(
	new InputStreamReader(vocOut, StandardCharsets.UTF_8));
	String inputLine = null;
	while ((inputLine = vocReader.readLine()) != null) {
	if (inputLine.startsWith("#") \|\| inputLine.trim().length() == 0)
	continue; /* comment */
	String words[] = inputLine.split("\t");
	BaseTokenStreamTestCase.checkOneTerm(a, words[0], words[1]);
	}
	}

	/** Run a vocabulary test against two data files inside a zip file */
	public static void assertVocabulary(Analyzer a, Path zipFile, String voc, String out) throws IOException {
	Path tmp = LuceneTestCase.createTempDir();
	try (InputStream in = Files.newInputStream(zipFile)) {
	TestUtil.unzip(in, tmp);
	}
	try (InputStream v = Files.newInputStream(tmp.resolve(voc));
	InputStream o = Files.newInputStream(tmp.resolve(out))) {
	assertVocabulary(a, v, o);
	}
	}

	/** Run a vocabulary test against a tab-separated data file inside a zip file */
	public static void assertVocabulary(Analyzer a, Path zipFile, String vocOut) throws IOException {
	Path tmp = LuceneTestCase.createTempDir();
	try (InputStream in = Files.newInputStream(zipFile)) {
	TestUtil.unzip(in, tmp);
	}
	try (InputStream in = Files.newInputStream(tmp.resolve(vocOut))) {
	assertVocabulary(a, in);
	}
	}
	}