blob: 993804544be6d6f9005a6169fb7607afc859adbf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.custom.CustomAnalyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
/**
* An analyzer that uses a tokenizer and a list of token filters to
* create a TokenStream.
*
* It should probably be replaced with {@link CustomAnalyzer}.
* @since 3.1
*/
public final class TokenizerChain extends SolrAnalyzer {
private static final CharFilterFactory[] EMPTY_CHAR_FITLERS = new CharFilterFactory[0];
private static final TokenFilterFactory[] EMPTY_TOKEN_FITLERS = new TokenFilterFactory[0];
final private CharFilterFactory[] charFilters;
final private TokenizerFactory tokenizer;
final private TokenFilterFactory[] filters;
/** Copy from CustomAnalyzer. */
public TokenizerChain(CustomAnalyzer customAnalyzer) {
this(
customAnalyzer.getCharFilterFactories().toArray(new CharFilterFactory[0]),
customAnalyzer.getTokenizerFactory(),
customAnalyzer.getTokenFilterFactories().toArray(new TokenFilterFactory[0]));
setPositionIncrementGap(customAnalyzer.getPositionIncrementGap(null));
setVersion(customAnalyzer.getVersion());
assert customAnalyzer.getOffsetGap(null) == 1; // note: we don't support setting the offset gap
}
/**
* Creates a new TokenizerChain w/o any CharFilterFactories.
*
* @param tokenizer Factory for the Tokenizer to use, must not be null.
* @param filters Factories for the TokenFilters to use - if null, will be treated as if empty.
*/
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
this(null,tokenizer,filters);
}
/**
* Creates a new TokenizerChain.
*
* @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
* @param tokenizer Factory for the Tokenizer to use, must not be null.
* @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
*/
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
if (null == tokenizer) {
throw new NullPointerException("TokenizerFactory must not be null");
}
this.charFilters = charFilters;
this.tokenizer = tokenizer;
this.filters = filters;
}
/** @return array of CharFilterFactories, may be empty but never null */
public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
/** @return the TokenizerFactory in use, will never be null */
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
/** @return array of TokenFilterFactories, may be empty but never null */
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
@Override
public Reader initReader(String fieldName, Reader reader) {
if (charFilters != null && charFilters.length > 0) {
Reader cs = reader;
for (CharFilterFactory charFilter : charFilters) {
cs = charFilter.create(cs);
}
reader = cs;
}
return reader;
}
@Override
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
if (charFilters != null && charFilters.length > 0) {
for (CharFilterFactory charFilter : charFilters) {
reader = charFilter.normalize(reader);
}
}
return reader;
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tk = tokenizer.create(attributeFactory(fieldName));
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.create(ts);
}
return new TokenStreamComponents(tk, ts);
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = in;
for (TokenFilterFactory filter : filters) {
result = filter.normalize(result);
}
return result;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("TokenizerChain(");
for (CharFilterFactory filter: charFilters) {
sb.append(filter);
sb.append(", ");
}
sb.append(tokenizer);
for (TokenFilterFactory filter: filters) {
sb.append(", ");
sb.append(filter);
}
sb.append(')');
return sb.toString();
}
public Analyzer getMultiTermAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tk = new KeywordTokenizer();
TokenStream ts = tk;
for (TokenFilterFactory filter : filters) {
ts = filter.normalize(ts);
}
return new TokenStreamComponents(tk, ts);
}
@Override
protected Reader initReader(String fieldName, Reader reader) {
if (charFilters != null && charFilters.length > 0) {
Reader cs = reader;
for (CharFilterFactory charFilter : charFilters) {
cs = charFilter.normalize(cs);
}
reader = cs;
}
return reader;
}
};
}
}