blob: 04c576537c37bbee06389c4ccdd9b4909980c599 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nutch.indexer.replace;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* POJO to store a filename, its match pattern and its replacement string.
*
* A checkAndReplace method is provided where you can simultaneously check if
* the field matches this replacer and if the pattern matches your field value.
*
* @author Peter Ciuffetti
*/
public class FieldReplacer {
private static final Logger LOG = LoggerFactory.getLogger(FieldReplacer.class
.getName());
private final String fieldName;
private final String toFieldName;
private final Pattern pattern;
private final String replacement;
private boolean isValid;
/**
* Create a FieldReplacer for a field.
*
* Any pattern exceptions are caught within this constructor and the object is
* marked inValid. The error will be logged. This prevents this caller from
* attempting invalid replacements.
*
* @param fieldName
* the name of the source field to operate on. Required.
* @param toFieldName
* the name of the target field. Required.
* @param pattern
* the pattern the field must match. Required.
* @param replacement
* the replacement string
* @param flags
* the Pattern flags value, or null if no flags are needed
*/
public FieldReplacer(String fieldName, String toFieldName, String pattern,
String replacement, Integer flags) {
this.isValid = true;
// Must have a non-empty field name and pattern.
if (fieldName == null || fieldName.trim().length() == 0) {
LOG.error("Empty fieldName provided, FieldReplacer marked invalid.");
this.isValid = false;
}
if (pattern == null || pattern.trim().length() == 0) {
LOG.error("Empty pattern for field " + fieldName
+ "provided, FieldReplacer marked invalid.");
this.isValid = false;
}
if (replacement == null) {
this.replacement = "";
} else {
this.replacement = replacement;
}
this.fieldName = fieldName.trim();
this.toFieldName = toFieldName.trim();
if (this.isValid) {
LOG.info("Compiling pattern " + pattern + " for field " + fieldName);
Pattern myPattern = null;
try {
if (flags != null) {
myPattern = Pattern.compile(pattern, flags);
} else {
myPattern = Pattern.compile(pattern);
}
} catch (PatternSyntaxException e) {
LOG.error("Pattern " + pattern + " for field " + fieldName
+ " failed to compile: " + e.toString());
this.isValid = false;
}
this.pattern = myPattern;
} else {
this.pattern = null;
}
}
/**
* Field replacer with the input and output field the same.
*
* @param fieldName
* @param pattern
* @param replacement
* @param flags
*/
public FieldReplacer(String fieldName, String pattern, String replacement,
Integer flags) {
this(fieldName, fieldName, pattern, replacement, flags);
}
public String getFieldName() {
return this.fieldName;
}
public String getToFieldName() {
return this.toFieldName;
}
public Pattern getPattern() {
return this.pattern;
}
public String getReplacement() {
return this.replacement;
}
/**
* Does this FieldReplacer have a valid fieldname and pattern?
*
* @return
*/
public boolean isValid() {
return this.isValid;
}
/**
* Return the replacement value for a field value.
*
* This does not check for a matching field; the caller must decide if this
* FieldReplacer should operate on this value by checking getFieldName().
*
* The method returns the value with the replacement. If the value returned is
* not different then eiher the pattern didn't match or the replacement was a
* no-op.
*
* @param value
* @return
*/
public String replace(String value) {
if (this.isValid) {
return this.pattern.matcher(value).replaceAll(replacement);
} else {
return value;
}
}
/**
* Return a replacement value for a field.
*
* This is designed to fail fast and trigger a replacement only when
* necessary. If this method returns null, either the field does not match or
* the value does not match the pattern (or possibly the pattern is invalid).
*
* So only if the method returns a non-null value will you need to replace the
* value for the field.
*
* @param fieldName
* the name of the field you are checking
* @param value
* the value of the field you are checking
* @return a replacement value. If null, either the field does not match or
* the value does not match.
*/
public String checkAndReplace(String fieldName, String value) {
if (this.fieldName.equals(fieldName)) {
if (value != null && value.length() > 0) {
if (this.isValid) {
Matcher m = this.pattern.matcher(value);
if (m.find()) {
return m.replaceAll(this.replacement);
}
}
}
}
return null;
}
}