| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.nutch.indexer.replace; |
| |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| import java.util.regex.PatternSyntaxException; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| /** |
| * POJO to store a filename, its match pattern and its replacement string. |
| * |
| * A checkAndReplace method is provided where you can simultaneously check if |
| * the field matches this replacer and if the pattern matches your field value. |
| * |
| * @author Peter Ciuffetti |
| */ |
| public class FieldReplacer { |
| |
| private static final Logger LOG = LoggerFactory.getLogger(FieldReplacer.class |
| .getName()); |
| |
| private final String fieldName; |
| private final String toFieldName; |
| private final Pattern pattern; |
| private final String replacement; |
| private boolean isValid; |
| |
| /** |
| * Create a FieldReplacer for a field. |
| * |
| * Any pattern exceptions are caught within this constructor and the object is |
| * marked inValid. The error will be logged. This prevents this caller from |
| * attempting invalid replacements. |
| * |
| * @param fieldName |
| * the name of the source field to operate on. Required. |
| * @param toFieldName |
| * the name of the target field. Required. |
| * @param pattern |
| * the pattern the field must match. Required. |
| * @param replacement |
| * the replacement string |
| * @param flags |
| * the Pattern flags value, or null if no flags are needed |
| */ |
| public FieldReplacer(String fieldName, String toFieldName, String pattern, |
| String replacement, Integer flags) { |
| |
| this.isValid = true; |
| // Must have a non-empty field name and pattern. |
| if (fieldName == null || fieldName.trim().length() == 0) { |
| LOG.error("Empty fieldName provided, FieldReplacer marked invalid."); |
| this.isValid = false; |
| } |
| if (pattern == null || pattern.trim().length() == 0) { |
| LOG.error("Empty pattern for field " + fieldName |
| + "provided, FieldReplacer marked invalid."); |
| this.isValid = false; |
| } |
| |
| if (replacement == null) { |
| this.replacement = ""; |
| } else { |
| this.replacement = replacement; |
| } |
| |
| this.fieldName = fieldName.trim(); |
| this.toFieldName = toFieldName.trim(); |
| |
| if (this.isValid) { |
| LOG.info("Compiling pattern " + pattern + " for field " + fieldName); |
| Pattern myPattern = null; |
| try { |
| if (flags != null) { |
| myPattern = Pattern.compile(pattern, flags); |
| } else { |
| myPattern = Pattern.compile(pattern); |
| } |
| } catch (PatternSyntaxException e) { |
| LOG.error("Pattern " + pattern + " for field " + fieldName |
| + " failed to compile: " + e.toString()); |
| this.isValid = false; |
| } |
| this.pattern = myPattern; |
| } else { |
| this.pattern = null; |
| } |
| } |
| |
| /** |
| * Field replacer with the input and output field the same. |
| * |
| * @param fieldName |
| * @param pattern |
| * @param replacement |
| * @param flags |
| */ |
| public FieldReplacer(String fieldName, String pattern, String replacement, |
| Integer flags) { |
| this(fieldName, fieldName, pattern, replacement, flags); |
| } |
| |
| public String getFieldName() { |
| return this.fieldName; |
| } |
| |
| public String getToFieldName() { |
| return this.toFieldName; |
| } |
| |
| public Pattern getPattern() { |
| return this.pattern; |
| } |
| |
| public String getReplacement() { |
| return this.replacement; |
| } |
| |
| /** |
| * Does this FieldReplacer have a valid fieldname and pattern? |
| * |
| * @return |
| */ |
| public boolean isValid() { |
| return this.isValid; |
| } |
| |
| /** |
| * Return the replacement value for a field value. |
| * |
| * This does not check for a matching field; the caller must decide if this |
| * FieldReplacer should operate on this value by checking getFieldName(). |
| * |
| * The method returns the value with the replacement. If the value returned is |
| * not different then eiher the pattern didn't match or the replacement was a |
| * no-op. |
| * |
| * @param value |
| * @return |
| */ |
| public String replace(String value) { |
| if (this.isValid) { |
| return this.pattern.matcher(value).replaceAll(replacement); |
| } else { |
| return value; |
| } |
| } |
| |
| /** |
| * Return a replacement value for a field. |
| * |
| * This is designed to fail fast and trigger a replacement only when |
| * necessary. If this method returns null, either the field does not match or |
| * the value does not match the pattern (or possibly the pattern is invalid). |
| * |
| * So only if the method returns a non-null value will you need to replace the |
| * value for the field. |
| * |
| * @param fieldName |
| * the name of the field you are checking |
| * @param value |
| * the value of the field you are checking |
| * @return a replacement value. If null, either the field does not match or |
| * the value does not match. |
| */ |
| public String checkAndReplace(String fieldName, String value) { |
| if (this.fieldName.equals(fieldName)) { |
| if (value != null && value.length() > 0) { |
| if (this.isValid) { |
| Matcher m = this.pattern.matcher(value); |
| if (m.find()) { |
| return m.replaceAll(this.replacement); |
| } |
| } |
| } |
| } |
| return null; |
| } |
| } |