lucene/backwards/src/test/org/apache/lucene/analysis/TestStopAnalyzer.java - manifoldcf-integration-solr-3.x - Git at Google

 package org.apache.lucene.analysis;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Version;

 import java.io.StringReader;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.Set;
 import java.util.HashSet;

 public class TestStopAnalyzer extends BaseTokenStreamTestCase {

   private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT);
   private Set<Object> inValidTokens = new HashSet<Object>();

   @Override
   public void setUp() throws Exception {
     super.setUp();

     Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
     while(it.hasNext()) {
       inValidTokens.add(it.next());
     }
   }

   public void testDefaults() throws IOException {
     assertTrue(stop != null);
     StringReader reader = new StringReader("This is a test of the english stop analyzer");
     TokenStream stream = stop.tokenStream("test", reader);
     assertTrue(stream != null);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

     while (stream.incrementToken()) {
       assertFalse(inValidTokens.contains(termAtt.toString()));
     }
   }

   public void testStopList() throws IOException {
     Set<Object> stopWordsSet = new HashSet<Object>();
     stopWordsSet.add("good");
     stopWordsSet.add("test");
     stopWordsSet.add("analyzer");
     StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
     StringReader reader = new StringReader("This is a good test of the english stop analyzer");
     TokenStream stream = newStop.tokenStream("test", reader);
     assertNotNull(stream);
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);

     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
       assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
     }
   }

   public void testStopListPositions() throws IOException {
     Set<Object> stopWordsSet = new HashSet<Object>();
     stopWordsSet.add("good");
     stopWordsSet.add("test");
     stopWordsSet.add("analyzer");
     StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
     StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
     int expectedIncr[] =                  { 1,   1, 1,          3, 1,  1,      1,            2,   1};
     TokenStream stream = newStop.tokenStream("test", reader);
     assertNotNull(stream);
     int i = 0;
     CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);

     while (stream.incrementToken()) {
       String text = termAtt.toString();
       assertFalse(stopWordsSet.contains(text));
       assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
     }
   }

 }
	package org.apache.lucene.analysis;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.util.Version;

	import java.io.StringReader;
	import java.io.IOException;
	import java.util.Iterator;
	import java.util.Set;
	import java.util.HashSet;

	public class TestStopAnalyzer extends BaseTokenStreamTestCase {

	private StopAnalyzer stop = new StopAnalyzer(TEST_VERSION_CURRENT);
	private Set<Object> inValidTokens = new HashSet<Object>();

	@Override
	public void setUp() throws Exception {
	super.setUp();

	Iterator<?> it = StopAnalyzer.ENGLISH_STOP_WORDS_SET.iterator();
	while(it.hasNext()) {
	inValidTokens.add(it.next());
	}
	}

	public void testDefaults() throws IOException {
	assertTrue(stop != null);
	StringReader reader = new StringReader("This is a test of the english stop analyzer");
	TokenStream stream = stop.tokenStream("test", reader);
	assertTrue(stream != null);
	CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);

	while (stream.incrementToken()) {
	assertFalse(inValidTokens.contains(termAtt.toString()));
	}
	}

	public void testStopList() throws IOException {
	Set<Object> stopWordsSet = new HashSet<Object>();
	stopWordsSet.add("good");
	stopWordsSet.add("test");
	stopWordsSet.add("analyzer");
	StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
	StringReader reader = new StringReader("This is a good test of the english stop analyzer");
	TokenStream stream = newStop.tokenStream("test", reader);
	assertNotNull(stream);
	CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
	PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);

	while (stream.incrementToken()) {
	String text = termAtt.toString();
	assertFalse(stopWordsSet.contains(text));
	assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
	}
	}

	public void testStopListPositions() throws IOException {
	Set<Object> stopWordsSet = new HashSet<Object>();
	stopWordsSet.add("good");
	stopWordsSet.add("test");
	stopWordsSet.add("analyzer");
	StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
	StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
	int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
	TokenStream stream = newStop.tokenStream("test", reader);
	assertNotNull(stream);
	int i = 0;
	CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
	PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);

	while (stream.incrementToken()) {
	String text = termAtt.toString();
	assertFalse(stopWordsSet.contains(text));
	assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
	}
	}

	}