blob: 2a03824140e59092577aff3e14e804a79e7aba0a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.lucas.indexer.mapping;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.lucene.analysis.TokenStream;
import org.apache.uima.lucas.indexer.analysis.SelectFilter;
import org.apache.uima.lucas.indexer.analysis.SelectFilterFactory;
/**
* <p>
* Constructs <tt>TokenStream</tt>s according to a cover definition.
* </p>
* <p>
* This class uses {@link SelectFilter}s to build <tt>TokenStream</tt>s which
* only output terms that pass the <tt>SelectFilter</tt>. Each
* <tt>TokenStream</tt> corresponds to one cover set and will only output terms
* included in this particular set. This mechanism can be used to dynamically
* create multiple fields according to a definition about which term may be included
* in which field.<br/>
* To create an instance of this class, the {@link TermCoverBuilderFactory}
* should be used as it cashes the cover definition. For further explanation of
* the nature of these covers and their required file format, please refer to
* the factory class.
* </p>
*
* @see TermCoverBuilderFactory
*
*/
public class TermCoverBuilder {
private final Map<String, List<String>> vocabularyCover;
private Iterator<String> subsetIterator;
private String currentCoverSubsetName;
private TokenStream currentCoverSubsetTokenStream;
private SelectFilterFactory selectFilterFactory;
private TokenStream tokenStream;
private Properties properties;
private final TermCoverDescription fieldDescription;
/**
* @param vocabularyCover
* @param selectFilterFactory
*/
public TermCoverBuilder(Map<String, List<String>> vocabularyCover,
TokenStream tokenStream, TermCoverDescription fieldDescription,
SelectFilterFactory selectFilterFactory) {
this.vocabularyCover = vocabularyCover;
this.tokenStream = tokenStream;
this.fieldDescription = fieldDescription;
this.selectFilterFactory = selectFilterFactory;
properties = new Properties();
properties.put("enablePositionIncrements", "false");
properties.put("ignoreCase",
fieldDescription.getIgnoreCaseOfSelectedTerms() ? "true" : "false");
subsetIterator = vocabularyCover.keySet().iterator();
}
public boolean increaseCoverSubset() {
if (subsetIterator.hasNext()) {
currentCoverSubsetName = subsetIterator.next();
properties.put(SelectFilterFactory.COVER_SUBSET_NAME, getCoverSubsetName());
currentCoverSubsetTokenStream = selectFilterFactory
.createTokenFilter(tokenStream, properties,
vocabularyCover.get(currentCoverSubsetName));
return true;
}
return false;
}
public String getCoverSubsetName() {
String fieldName = fieldDescription.getFieldName();
String combineFieldName = fieldDescription.getGenerateFieldNameMethod();
if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_APPEND))
fieldName = fieldName + currentCoverSubsetName;
else if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_PREPEND))
fieldName = currentCoverSubsetName + fieldName;
else if (combineFieldName.equals(TermCoverMapper.USE_SUBSET_NAME_REPLACE))
fieldName = currentCoverSubsetName;
return fieldName;
}
public TokenStream getPartitionTokenStream() {
return currentCoverSubsetTokenStream;
}
}