blob: 010cfb60a8a664d755ff9c99385bc6f55cab793c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
/** Implements the wildcard search query. Supported wildcards are <code>*</code>, which
* matches any character sequence (including the empty one), and <code>?</code>,
* which matches any single character. '\' is the escape character.
* <p>
* Note this query can be slow, as it
* needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
* a Wildcard term should not start with the wildcard <code>*</code>
*
* <p>This query uses the {@link
* MultiTermQuery#CONSTANT_SCORE_REWRITE}
* rewrite method.
*
* @see AutomatonQuery
*/
public class WildcardQuery extends AutomatonQuery {
/** String equality with support for wildcards */
public static final char WILDCARD_STRING = '*';
/** Char equality with support for wildcards */
public static final char WILDCARD_CHAR = '?';
/** Escape character */
public static final char WILDCARD_ESCAPE = '\\';
/**
* Constructs a query for terms matching <code>term</code>.
*/
public WildcardQuery(Term term) {
super(term, toAutomaton(term));
}
/**
* Constructs a query for terms matching <code>term</code>.
* @param determinizeWorkLimit maximum effort to spend while compiling the automaton from this
* wildcard. Set higher to allow more complex queries and lower to prevent memory exhaustion.
* Use {@link Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a decent default if you don't
* otherwise know what to specify.
*/
public WildcardQuery(Term term, int determinizeWorkLimit) {
super(term, toAutomaton(term), determinizeWorkLimit);
}
/**
* Convert Lucene wildcard syntax into an automaton.
* @lucene.internal
*/
@SuppressWarnings("fallthrough")
public static Automaton toAutomaton(Term wildcardquery) {
List<Automaton> automata = new ArrayList<>();
String wildcardText = wildcardquery.text();
for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i);
int length = Character.charCount(c);
switch(c) {
case WILDCARD_STRING:
automata.add(Automata.makeAnyString());
break;
case WILDCARD_CHAR:
automata.add(Automata.makeAnyChar());
break;
case WILDCARD_ESCAPE:
// add the next codepoint instead, if it exists
if (i + length < wildcardText.length()) {
final int nextChar = wildcardText.codePointAt(i + length);
length += Character.charCount(nextChar);
automata.add(Automata.makeChar(nextChar));
break;
} // else fallthru, lenient parsing with a trailing \
default:
automata.add(Automata.makeChar(c));
}
i += length;
}
return Operations.concatenate(automata);
}
/**
* Returns the pattern term.
*/
public Term getTerm() {
return term;
}
/** Prints a user-readable version of this query. */
@Override
public String toString(String field) {
StringBuilder buffer = new StringBuilder();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(term.text());
return buffer.toString();
}
}