blob: 778df5b19346aba68eeb1907190f67744d18fcec [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.runtime.evaluators.functions.utils;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.asterix.runtime.evaluators.functions.StringEvaluatorUtils;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
import org.apache.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
import org.apache.hyracks.data.std.util.UTF8CharSequence;
/**
* A wrapper for regular expression processing methods.
*/
public class RegExpMatcher {
// Library regular expression processing objects.
private Pattern pattern = null;
private Matcher matcher = null;
// For storing the pattern string.
private final ByteArrayAccessibleOutputStream lastPatternStorage = new ByteArrayAccessibleOutputStream();
private final UTF8StringPointable lastPatternPtr = new UTF8StringPointable();
// For storing the flag string.
private final ByteArrayAccessibleOutputStream lastFlagsStorage = new ByteArrayAccessibleOutputStream();
private final UTF8StringPointable lastFlagPtr = new UTF8StringPointable();
// The char sequence for the source string.
private final UTF8CharSequence charSeq = new UTF8CharSequence();
// For storing the replacement string.
private final ByteArrayAccessibleOutputStream lastReplaceStorage = new ByteArrayAccessibleOutputStream();
private final UTF8StringPointable lastReplaceStrPtr = new UTF8StringPointable();
private String replaceStr = null;
// For storing the string replacement result.
private final StringBuffer resultBuf = new StringBuffer();
@FunctionalInterface
public interface IRegExpPatternGenerator {
String toRegExpPatternString(String inputString) throws HyracksDataException;
}
/**
* Builds the matcher.
*
* @param srcPtr
* , the source string for regular expression operations.
* @param patternPtr
* , the definition of the regular expression.
*/
public void build(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr) throws HyracksDataException {
build(srcPtr, patternPtr, null);
}
/**
* Builds the matcher.
*
* @param srcPtr
* , the source string for regular expression operations.
* @param patternPtr
* , the definition of the regular expression.
* @param flagPtr
* , the flags.
*/
public void build(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, UTF8StringPointable flagPtr)
throws HyracksDataException {
build(srcPtr, patternPtr, flagPtr, null);
}
/**
* Builds the matcher.
*
* @param srcPtr
* , the source string for regular expression operations.
* @param patternPtr
* , the definition of the regular expression.
* @param flagPtr
* , the flags.
* @param patternGenerator
* , the regular expression pattern generator.
*/
public void build(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr, UTF8StringPointable flagPtr,
IRegExpPatternGenerator patternGenerator) throws HyracksDataException {
// Builds a new pattern if necessary. patternPtr cannot be null
final boolean newPattern = pattern == null || lastPatternPtr.compareTo(patternPtr) != 0;
final boolean newFlag = flagPtr != null && (pattern == null || lastFlagPtr.compareTo(flagPtr) != 0);
charSeq.reset(srcPtr);
if (newPattern || newFlag) {
// ! object creation !
String patternString;
if (newPattern) {
StringEvaluatorUtils.copyResetUTF8Pointable(patternPtr, lastPatternStorage, lastPatternPtr);
String inputPatternString = lastPatternPtr.toString();
patternString = patternGenerator == null ? inputPatternString
: patternGenerator.toRegExpPatternString(inputPatternString);
} else {
// use whatever pattern string the previous pattern was using
patternString = pattern.pattern();
}
int flags = 0;
if (newFlag) {
StringEvaluatorUtils.copyResetUTF8Pointable(flagPtr, lastFlagsStorage, lastFlagPtr);
flags = StringEvaluatorUtils.toFlag(lastFlagPtr.toString());
} else if (pattern != null) {
// use whatever flags the previous pattern was using
flags = pattern.flags();
}
pattern = Pattern.compile(patternString, flags);
matcher = pattern.matcher(charSeq);
} else {
matcher.reset(charSeq);
}
}
/**
* Whether the source string matches the regular expression defined pattern.
*
* @return true if it contains the pattern; false otherwise.
*/
public boolean matches() {
return matcher.matches();
}
/**
* Whether the source string contains the regular expression defined pattern.
*
* @return true if it contains the pattern; false otherwise.
*/
public boolean find() {
return matcher.find();
}
/**
* Returns the matched string. This should be called after checking that the find()
* method returns true.
*
* @return The matched string
*/
public String group() {
return matcher.group();
}
/**
* @return the first matched position of the regular expression pattern in the source string.
*/
public int position() {
return matcher.find() ? matcher.start() : -1;
}
/**
* Replaces the appearances of a regular expression defined pattern in a source string with a given
* replacement string.
*
* @param replaceStrPtr
* , the string for replacing the regular expression.
* @return a new string with contained regular expressions replaced.
*/
public String replace(UTF8StringPointable replaceStrPtr) {
return replace(replaceStrPtr, Integer.MAX_VALUE);
}
/**
* Replaces the appearances of a regular expression defined pattern in a source string with a given
* replacement string.
*
* @param replaceStrPtr
* , the string for replacing the regular expression.
* @param replaceLimit
* , the maximum number of replacements to make
* @return a new string with contained regular expressions replaced.
*/
public String replace(UTF8StringPointable replaceStrPtr, int replaceLimit) {
if (replaceLimit < 0) {
replaceLimit = Integer.MAX_VALUE;
}
// Sets up a new replacement string if necessary.
final boolean newReplace =
replaceStrPtr != null && (replaceStr == null || lastReplaceStrPtr.compareTo(replaceStrPtr) != 0);
if (newReplace) {
StringEvaluatorUtils.copyResetUTF8Pointable(replaceStrPtr, lastReplaceStorage, lastReplaceStrPtr);
replaceStr = replaceStrPtr.toString();
}
// Does the actual replacement.
resultBuf.setLength(0);
for (int i = 0; i < replaceLimit && matcher.find(); i++) {
matcher.appendReplacement(resultBuf, replaceStr);
}
matcher.appendTail(resultBuf);
return resultBuf.toString();
}
}