blob: e911340fe77809d1c550bab57901cca95ce9f372 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.extraction;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
*/
public class RegexDimExtractionFn extends DimExtractionFn
{
private static final byte CACHE_KEY_SEPARATOR = (byte) 0xFF;
private final String expr;
private final int index;
private final Pattern pattern;
private final boolean replaceMissingValue;
private final String replaceMissingValueWith;
@JsonCreator
public RegexDimExtractionFn(
@JsonProperty("expr") String expr,
@JsonProperty("index") Integer index,
@JsonProperty("replaceMissingValue") Boolean replaceMissingValue,
@JsonProperty("replaceMissingValueWith") String replaceMissingValueWith
)
{
Preconditions.checkNotNull(expr, "expr must not be null");
this.expr = expr;
this.index = index == null ? 1 : index;
this.pattern = Pattern.compile(expr);
this.replaceMissingValue = replaceMissingValue == null ? false : replaceMissingValue;
this.replaceMissingValueWith = replaceMissingValueWith;
}
public RegexDimExtractionFn(
String expr,
Boolean replaceMissingValue,
String replaceMissingValueWith
)
{
this(expr, null, replaceMissingValue, replaceMissingValueWith);
}
@Override
public byte[] getCacheKey()
{
byte[] exprBytes = StringUtils.toUtf8(expr);
byte[] indexBytes = Ints.toByteArray(index);
byte[] replaceBytes = replaceMissingValue ? new byte[]{1} : new byte[]{0};
byte[] replaceStrBytes;
if (replaceMissingValueWith == null) {
replaceStrBytes = new byte[]{};
} else {
replaceStrBytes = StringUtils.toUtf8(replaceMissingValueWith);
}
int totalLen = 1
+ exprBytes.length
+ indexBytes.length
+ replaceBytes.length
+ replaceStrBytes.length; // fields
totalLen += 2; // separators
return ByteBuffer.allocate(totalLen)
.put(ExtractionCacheHelper.CACHE_TYPE_ID_REGEX)
.put(exprBytes)
.put(CACHE_KEY_SEPARATOR)
.put(indexBytes)
.put(replaceStrBytes)
.put(CACHE_KEY_SEPARATOR)
.put(replaceBytes)
.array();
}
@Nullable
@Override
public String apply(@Nullable String dimValue)
{
final String retVal;
String val = NullHandling.nullToEmptyIfNeeded(dimValue);
final Matcher matcher = val == null ? null : pattern.matcher(val);
if (matcher != null && matcher.find()) {
retVal = matcher.group(index);
} else {
retVal = replaceMissingValue ? replaceMissingValueWith : dimValue;
}
return NullHandling.emptyToNullIfNeeded(retVal);
}
@JsonProperty("expr")
public String getExpr()
{
return expr;
}
@JsonProperty
public int getIndex()
{
return index;
}
@JsonProperty("replaceMissingValue")
public boolean isReplaceMissingValue()
{
return replaceMissingValue;
}
@JsonProperty("replaceMissingValueWith")
public String getReplaceMissingValueWith()
{
return replaceMissingValueWith;
}
@Override
public boolean preservesOrdering()
{
return false;
}
@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}
@Override
public String toString()
{
return StringUtils.format("regex(/%s/, %d)", expr, index);
}
@Override
public boolean equals(final Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final RegexDimExtractionFn that = (RegexDimExtractionFn) o;
return index == that.index &&
replaceMissingValue == that.replaceMissingValue &&
Objects.equals(expr, that.expr) &&
Objects.equals(replaceMissingValueWith, that.replaceMissingValueWith);
}
@Override
public int hashCode()
{
return Objects.hash(expr, index, replaceMissingValue, replaceMissingValueWith);
}
}