| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.druid.query.extraction; |
| |
| import com.fasterxml.jackson.annotation.JsonSubTypes; |
| import com.fasterxml.jackson.annotation.JsonTypeInfo; |
| import org.apache.druid.guice.annotations.ExtensionPoint; |
| import org.apache.druid.java.util.common.Cacheable; |
| import org.apache.druid.query.lookup.LookupExtractionFn; |
| |
| import javax.annotation.Nullable; |
| |
| /** |
| * An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension). |
| * Note that ExtractionFn implementations are expected to be Threadsafe. |
| * |
| * A simple example of the type of operation this enables is the RegexDimExtractionFn which applies a |
| * regular expression with a capture group. When the regular expression matches the value of a dimension, |
| * the value captured by the group is used for grouping operations instead of the dimension value. |
| */ |
| @ExtensionPoint |
| @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "type") |
| @JsonSubTypes(value = { |
| @JsonSubTypes.Type(name = "time", value = TimeDimExtractionFn.class), |
| @JsonSubTypes.Type(name = "regex", value = RegexDimExtractionFn.class), |
| @JsonSubTypes.Type(name = "partial", value = MatchingDimExtractionFn.class), |
| @JsonSubTypes.Type(name = "searchQuery", value = SearchQuerySpecDimExtractionFn.class), |
| @JsonSubTypes.Type(name = "javascript", value = JavaScriptExtractionFn.class), |
| @JsonSubTypes.Type(name = "timeFormat", value = TimeFormatExtractionFn.class), |
| @JsonSubTypes.Type(name = "identity", value = IdentityExtractionFn.class), |
| @JsonSubTypes.Type(name = "lookup", value = LookupExtractionFn.class), |
| @JsonSubTypes.Type(name = "substring", value = SubstringDimExtractionFn.class), |
| @JsonSubTypes.Type(name = "cascade", value = CascadeExtractionFn.class), |
| @JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class), |
| @JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class), |
| @JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class), |
| @JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class), |
| @JsonSubTypes.Type(name = "strlen", value = StrlenExtractionFn.class) |
| }) |
| public interface ExtractionFn extends Cacheable |
| { |
| /** |
| * The "extraction" function. This should map an Object into some String value. |
| * <p> |
| * In order to maintain the "null and empty string are equivalent" semantics that Druid provides, the |
| * empty string is considered invalid output for this method and should instead return null. This is |
| * a contract on the method rather than enforced at a lower level in order to eliminate a global check |
| * for extraction functions that do not already need one. |
| * |
| * @param value the original value of the dimension |
| * |
| * @return a value that should be used instead of the original |
| */ |
| @Nullable |
| String apply(@Nullable Object value); |
| |
| /** |
| * The "extraction" function. This should map a String value into some other String value. |
| * <p> |
| * Like {@link #apply(Object)}, the empty string is considered invalid output for this method and it should |
| * instead return null. |
| * |
| * @param value the original value of the dimension |
| * |
| * @return a value that should be used instead of the original |
| */ |
| @Nullable |
| String apply(@Nullable String value); |
| |
| /** |
| * The "extraction" function. This should map a long value into some String value. |
| * <p> |
| * Like {@link #apply(Object)}, the empty string is considered invalid output for this method and it should |
| * instead return null. |
| * |
| * @param value the original value of the dimension |
| * |
| * @return a value that should be used instead of the original |
| */ |
| String apply(long value); |
| |
| /** |
| * Offers information on whether the extraction will preserve the original ordering of the values. |
| * <p> |
| * Some optimizations of queries is possible if ordering is preserved. Null values *do* count towards |
| * ordering. |
| * |
| * @return true if ordering is preserved, false otherwise |
| */ |
| boolean preservesOrdering(); |
| |
| /** |
| * A dim extraction can be of one of two types, renaming or rebucketing. In the `ONE_TO_ONE` case, a unique values is |
| * modified into another unique value. In the `MANY_TO_ONE` case, there is no longer a 1:1 relation between old dimension |
| * value and new dimension value |
| * |
| * @return {@link ExtractionFn.ExtractionType} declaring what kind of manipulation this function does |
| */ |
| ExtractionType getExtractionType(); |
| |
| enum ExtractionType |
| { |
| MANY_TO_ONE, ONE_TO_ONE |
| } |
| } |