lucene/core/src/java/org/apache/lucene/search/WildcardQuery.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search;


 import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;

 /** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
  * matches any character sequence (including the empty one), and <code>?</code>,
  * which matches any single character. '\' is the escape character.
  * <p>
  * Note this query can be slow, as it
  * needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
  * a Wildcard term should not start with the wildcard <code>*</code>
  *
  * <p>This query uses the {@link
  * MultiTermQuery#CONSTANT_SCORE_REWRITE}
  * rewrite method.
  *
  * @see AutomatonQuery
  */
 public class WildcardQuery extends AutomatonQuery {
   /** String equality with support for wildcards */
   public static final char WILDCARD_STRING = '*';

   /** Char equality with support for wildcards */
   public static final char WILDCARD_CHAR = '?';

   /** Escape character */
   public static final char WILDCARD_ESCAPE = '\\';

   /**
    * Constructs a query for terms matching <code>term</code>.
    */
   public WildcardQuery(Term term) {
     super(term, toAutomaton(term));
   }

   /**
    * Constructs a query for terms matching <code>term</code>.
    * @param determinizeWorkLimit maximum effort to spend while compiling the automaton from this
    *        wildcard. Set higher to allow more complex queries and lower to prevent memory exhaustion.
    *        Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a decent default if you don't
    *        otherwise know what to specify.
    */
   public WildcardQuery(Term term, int determinizeWorkLimit) {
     super(term, toAutomaton(term), determinizeWorkLimit);
   }

   /**
    * Convert Lucene wildcard syntax into an automaton.
    * @lucene.internal
    */
   @SuppressWarnings("fallthrough")
   public static Automaton toAutomaton(Term wildcardquery) {
     List<Automaton> automata = new ArrayList<>();

     String wildcardText = wildcardquery.text();

     for (int i = 0; i < wildcardText.length();) {
       final int c = wildcardText.codePointAt(i);
       int length = Character.charCount(c);
       switch(c) {
         case WILDCARD_STRING:
           automata.add(Automata.makeAnyString());
           break;
         case WILDCARD_CHAR:
           automata.add(Automata.makeAnyChar());
           break;
         case WILDCARD_ESCAPE:
           // add the next codepoint instead, if it exists
           if (i + length < wildcardText.length()) {
             final int nextChar = wildcardText.codePointAt(i + length);
             length += Character.charCount(nextChar);
             automata.add(Automata.makeChar(nextChar));
             break;
           } // else fallthru, lenient parsing with a trailing \
         default:
           automata.add(Automata.makeChar(c));
       }
       i += length;
     }

     return Operations.concatenate(automata);
   }

   /**
    * Returns the pattern term.
    */
   public Term getTerm() {
     return term;
   }

   /** Prints a user-readable version of this query. */
   @Override
   public String toString(String field) {
     StringBuilder buffer = new StringBuilder();
     if (!getField().equals(field)) {
       buffer.append(getField());
       buffer.append(":");
     }
     buffer.append(term.text());
     return buffer.toString();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search;


	import java.util.ArrayList;
	import java.util.List;

	import org.apache.lucene.index.Term;
	import org.apache.lucene.util.automaton.Automata;
	import org.apache.lucene.util.automaton.Automaton;
	import org.apache.lucene.util.automaton.Operations;

	/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
	* matches any character sequence (including the empty one), and <code>?</code>,
	* which matches any single character. '\' is the escape character.
	* <p>
	* Note this query can be slow, as it
	* needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
	* a Wildcard term should not start with the wildcard <code>*</code>
	*
	* <p>This query uses the {@link
	* MultiTermQuery#CONSTANT_SCORE_REWRITE}
	* rewrite method.
	*
	* @see AutomatonQuery
	*/
	public class WildcardQuery extends AutomatonQuery {
	/** String equality with support for wildcards */
	public static final char WILDCARD_STRING = '*';

	/** Char equality with support for wildcards */
	public static final char WILDCARD_CHAR = '?';

	/** Escape character */
	public static final char WILDCARD_ESCAPE = '\\';

	/**
	* Constructs a query for terms matching <code>term</code>.
	*/
	public WildcardQuery(Term term) {
	super(term, toAutomaton(term));
	}

	/**
	* Constructs a query for terms matching <code>term</code>.
	* @param determinizeWorkLimit maximum effort to spend while compiling the automaton from this
	* wildcard. Set higher to allow more complex queries and lower to prevent memory exhaustion.
	* Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a decent default if you don't
	* otherwise know what to specify.
	*/
	public WildcardQuery(Term term, int determinizeWorkLimit) {
	super(term, toAutomaton(term), determinizeWorkLimit);
	}

	/**
	* Convert Lucene wildcard syntax into an automaton.
	* @lucene.internal
	*/
	@SuppressWarnings("fallthrough")
	public static Automaton toAutomaton(Term wildcardquery) {
	List<Automaton> automata = new ArrayList<>();

	String wildcardText = wildcardquery.text();

	for (int i = 0; i < wildcardText.length();) {
	final int c = wildcardText.codePointAt(i);
	int length = Character.charCount(c);
	switch(c) {
	case WILDCARD_STRING:
	automata.add(Automata.makeAnyString());
	break;
	case WILDCARD_CHAR:
	automata.add(Automata.makeAnyChar());
	break;
	case WILDCARD_ESCAPE:
	// add the next codepoint instead, if it exists
	if (i + length < wildcardText.length()) {
	final int nextChar = wildcardText.codePointAt(i + length);
	length += Character.charCount(nextChar);
	automata.add(Automata.makeChar(nextChar));
	break;
	} // else fallthru, lenient parsing with a trailing \
	default:
	automata.add(Automata.makeChar(c));
	}
	i += length;
	}

	return Operations.concatenate(automata);
	}

	/**
	* Returns the pattern term.
	*/
	public Term getTerm() {
	return term;
	}

	/** Prints a user-readable version of this query. */
	@Override
	public String toString(String field) {
	StringBuilder buffer = new StringBuilder();
	if (!getField().equals(field)) {
	buffer.append(getField());
	buffer.append(":");
	}
	buffer.append(term.text());
	return buffer.toString();
	}
	}