blob: 2bc2b26e79ac9cb8d4ffbaf0f2c61df49645c730 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.roller.util;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.codec.binary.Hex;
/**
* Regular expressions utility class.
*/
public final class RegexUtil {
public static final Pattern MAILTO_PATTERN =
Pattern.compile("mailto:([a-zA-Z0-9\\.\\-]+@[a-zA-Z0-9\\.\\-]+\\.[a-zA-Z0-9]+)");
public static final Pattern EMAIL_PATTERN =
Pattern.compile("\\b[a-zA-Z0-9\\.\\-]+(@)([a-zA-Z0-9\\.\\-]+)(\\.)([a-zA-Z0-9]+)\\b");
public static String encodeEmail(String str) {
// obfuscate mailto's: turns them into hex encoded,
// so that browsers can still understand the mailto link
Matcher mailtoMatch = MAILTO_PATTERN.matcher(str);
while (mailtoMatch.find()) {
String email = mailtoMatch.group(1);
//System.out.println("email=" + email);
String hexed = encode(email);
str = str.replaceFirst("mailto:"+email, "mailto:"+hexed);
}
return obfuscateEmail(str);
}
/**
* obfuscate plaintext emails: makes them
* "human-readable" - still too easy for
* machines to parse however.
*/
public static String obfuscateEmail(String str) {
Matcher emailMatch = EMAIL_PATTERN.matcher(str);
while (emailMatch.find()) {
String at = emailMatch.group(1);
//System.out.println("at=" + at);
str = str.replaceFirst(at, "-AT-");
String dot = emailMatch.group(2) + emailMatch.group(3) + emailMatch.group(4);
String newDot = emailMatch.group(2) + "-DOT-" + emailMatch.group(4);
//System.out.println("dot=" + dot);
str = str.replaceFirst(dot, newDot);
}
return str;
}
/**
* Return the specified match "groups" from the pattern.
* For each group matched a String will be entered in the ArrayList.
*
* @param pattern The Pattern to use.
* @param match The String to match against.
* @param group The group number to return in case of a match.
* @return List of matched groups from the pattern.
*/
public static List<String> getMatches(Pattern pattern, String match, int group) {
List<String> matches = new ArrayList<String>();
Matcher matcher = pattern.matcher(match);
while (matcher.find()) {
matches.add( matcher.group(group) );
}
return matches;
}
/**
* Thanks to the folks at Blojsom (http://sf.net/projects/blojsom)
* for showing me what I was doing wrong with the Hex class.
*
* @param email
* @return
*/
public static String encode(String email) {
StringBuilder result = new StringBuilder();
try {
char[] hexString = Hex.encodeHex(email.getBytes("UTF-8"));
for (int i = 0; i < hexString.length; i++) {
if (i % 2 == 0) {
result.append("%");
}
result.append(hexString[i]);
}
} catch (UnsupportedEncodingException e) {
return email;
}
return result.toString();
}
}