| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.cassandra.index.sasi.analyzer; |
| |
| import java.nio.ByteBuffer; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| |
| import org.apache.cassandra.index.sasi.analyzer.filter.BasicResultFilters; |
| import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineBuilder; |
| import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineExecutor; |
| import org.apache.cassandra.index.sasi.analyzer.filter.FilterPipelineTask; |
| import org.apache.cassandra.db.marshal.AbstractType; |
| import org.apache.cassandra.db.marshal.AsciiType; |
| import org.apache.cassandra.db.marshal.UTF8Type; |
| import org.apache.cassandra.serializers.MarshalException; |
| import org.apache.cassandra.utils.ByteBufferUtil; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * Analyzer that does *not* tokenize the input. Optionally will |
| * apply filters for the input output as defined in analyzers options |
| */ |
| public class NonTokenizingAnalyzer extends AbstractAnalyzer |
| { |
| private static final Logger logger = LoggerFactory.getLogger(NonTokenizingAnalyzer.class); |
| |
| private static final Set<AbstractType<?>> VALID_ANALYZABLE_TYPES = new HashSet<AbstractType<?>>() |
| {{ |
| add(UTF8Type.instance); |
| add(AsciiType.instance); |
| }}; |
| |
| private AbstractType validator; |
| private NonTokenizingOptions options; |
| private FilterPipelineTask filterPipeline; |
| |
| private ByteBuffer input; |
| private boolean hasNext = false; |
| |
| public void init(Map<String, String> options, AbstractType validator) |
| { |
| init(NonTokenizingOptions.buildFromMap(options), validator); |
| } |
| |
| public void init(NonTokenizingOptions tokenizerOptions, AbstractType validator) |
| { |
| this.validator = validator; |
| this.options = tokenizerOptions; |
| this.filterPipeline = getFilterPipeline(); |
| } |
| |
| public boolean hasNext() |
| { |
| // check that we know how to handle the input, otherwise bail |
| if (!VALID_ANALYZABLE_TYPES.contains(validator)) |
| return false; |
| |
| if (hasNext) |
| { |
| String inputStr; |
| |
| try |
| { |
| inputStr = validator.getString(input); |
| if (inputStr == null) |
| throw new MarshalException(String.format("'null' deserialized value for %s with %s", ByteBufferUtil.bytesToHex(input), validator)); |
| |
| Object pipelineRes = FilterPipelineExecutor.execute(filterPipeline, inputStr); |
| if (pipelineRes == null) |
| return false; |
| |
| next = validator.fromString(normalize((String) pipelineRes)); |
| return true; |
| } |
| catch (MarshalException e) |
| { |
| logger.error("Failed to deserialize value with " + validator, e); |
| return false; |
| } |
| finally |
| { |
| hasNext = false; |
| } |
| } |
| |
| return false; |
| } |
| |
| public void reset(ByteBuffer input) |
| { |
| this.next = null; |
| this.input = input; |
| this.hasNext = true; |
| } |
| |
| private FilterPipelineTask getFilterPipeline() |
| { |
| FilterPipelineBuilder builder = new FilterPipelineBuilder(new BasicResultFilters.NoOperation()); |
| if (options.isCaseSensitive() && options.shouldLowerCaseOutput()) |
| builder = builder.add("to_lower", new BasicResultFilters.LowerCase()); |
| if (options.isCaseSensitive() && options.shouldUpperCaseOutput()) |
| builder = builder.add("to_upper", new BasicResultFilters.UpperCase()); |
| if (!options.isCaseSensitive()) |
| builder = builder.add("to_lower", new BasicResultFilters.LowerCase()); |
| return builder.build(); |
| } |
| |
| @Override |
| public boolean isCompatibleWith(AbstractType<?> validator) |
| { |
| return VALID_ANALYZABLE_TYPES.contains(validator); |
| } |
| } |