solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.rest.schema.analysis;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.rest.ManagedResource;

 /**
  * TokenFilterFactory that uses the ManagedWordSetResource implementation
  * for managing stop words using the REST API.
  * @since 4.8.0
  * @lucene.spi {@value #NAME}
  */
 public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory {

   /** SPI name */
   public static final String NAME = "managedStop";

   // this only gets changed once during core initialization and not every
   // time an update is made to the underlying managed word set.
   private CharArraySet stopWords = null;

   /**
    * Initialize the managed "handle"
    */
   public ManagedStopFilterFactory(Map<String,String> args) {
     super(args);
   }

   /**
    * This analysis component knows the most logical "path"
    * for which to manage stop words from.
    */
   @Override
   public String getResourceId() {
     return "/schema/analysis/stopwords/" + handle;
   }

   /**
    * Returns the implementation class for managing stop words.
    */
   protected Class<? extends ManagedResource> getManagedResourceImplClass() {
     return ManagedWordSetResource.class;
   }

   /**
    * Callback invoked by the {@link ManagedResource} instance to trigger this
    * class to create the CharArraySet used to create the StopFilter using the
    * wordset managed by {@link ManagedWordSetResource}. Keep in mind that
    * a schema.xml may reuse the same {@link ManagedStopFilterFactory} many
    * times for different field types; behind the scenes all instances of this
    * class/handle combination share the same managed data, hence the need for
    * a listener/callback scheme.
    */
   @Override
   public void onManagedResourceInitialized(NamedList<?> args, ManagedResource res)
       throws SolrException {

     Set<String> managedWords = ((ManagedWordSetResource)res).getWordSet();

     // first thing is to rebuild the Lucene CharArraySet from our managedWords set
     // which is slightly inefficient to do for every instance of the managed filter
     // but ManagedResource's don't have access to the luceneMatchVersion
     boolean ignoreCase = args.getBooleanArg("ignoreCase");
     stopWords = new CharArraySet(managedWords.size(), ignoreCase);
     stopWords.addAll(managedWords);
   }

   /**
    * Returns a StopFilter based on our managed stop word set.
    */
   @Override
   public TokenStream create(TokenStream input) {
     if (stopWords == null) {
       throw new IllegalStateException("Managed stopwords not initialized correctly!");
     }
     return new StopFilter(input, stopWords);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.rest.schema.analysis;
	import java.util.Map;
	import java.util.Set;

	import org.apache.lucene.analysis.CharArraySet;
	import org.apache.lucene.analysis.core.StopFilter;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.solr.common.SolrException;
	import org.apache.solr.common.util.NamedList;
	import org.apache.solr.rest.ManagedResource;

	/**
	* TokenFilterFactory that uses the ManagedWordSetResource implementation
	* for managing stop words using the REST API.
	* @since 4.8.0
	* @lucene.spi {@value #NAME}
	*/
	public class ManagedStopFilterFactory extends BaseManagedTokenFilterFactory {

	/** SPI name */
	public static final String NAME = "managedStop";

	// this only gets changed once during core initialization and not every
	// time an update is made to the underlying managed word set.
	private CharArraySet stopWords = null;

	/**
	* Initialize the managed "handle"
	*/
	public ManagedStopFilterFactory(Map<String,String> args) {
	super(args);
	}

	/**
	* This analysis component knows the most logical "path"
	* for which to manage stop words from.
	*/
	@Override
	public String getResourceId() {
	return "/schema/analysis/stopwords/" + handle;
	}

	/**
	* Returns the implementation class for managing stop words.
	*/
	protected Class<? extends ManagedResource> getManagedResourceImplClass() {
	return ManagedWordSetResource.class;
	}

	/**
	* Callback invoked by the {@link ManagedResource} instance to trigger this
	* class to create the CharArraySet used to create the StopFilter using the
	* wordset managed by {@link ManagedWordSetResource}. Keep in mind that
	* a schema.xml may reuse the same {@link ManagedStopFilterFactory} many
	* times for different field types; behind the scenes all instances of this
	* class/handle combination share the same managed data, hence the need for
	* a listener/callback scheme.
	*/
	@Override
	public void onManagedResourceInitialized(NamedList<?> args, ManagedResource res)
	throws SolrException {

	Set<String> managedWords = ((ManagedWordSetResource)res).getWordSet();

	// first thing is to rebuild the Lucene CharArraySet from our managedWords set
	// which is slightly inefficient to do for every instance of the managed filter
	// but ManagedResource's don't have access to the luceneMatchVersion
	boolean ignoreCase = args.getBooleanArg("ignoreCase");
	stopWords = new CharArraySet(managedWords.size(), ignoreCase);
	stopWords.addAll(managedWords);
	}

	/**
	* Returns a StopFilter based on our managed stop word set.
	*/
	@Override
	public TokenStream create(TokenStream input) {
	if (stopWords == null) {
	throw new IllegalStateException("Managed stopwords not initialized correctly!");
	}
	return new StopFilter(input, stopWords);
	}
	}