blob: 8346234fcdca15707285241728f9f369252a5f63 [file] [log] [blame]
/* $Id$ */
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.authorities.mappers.regexp;
import org.apache.manifoldcf.core.interfaces.*;
import java.util.*;
import java.util.regex.*;
/** An instance of this class describes a "match map", which describes a translation of an input
* string using regexp technology.
* A match map consists of multiple clauses, which are fired in sequence. Each clause is a regexp
* search and replace, where the replace string can include references to the groups present in the
* search regexp.
* MatchMaps can be converted to strings in two different ways. The first way is to build a single
* string of the form "match1=replace1&match2=replace2...". Strings of this kind must escape & and =
* characters in the match and replace strings, where found. The second way is to generate an array
* of match strings and a corresponding array of replace strings. This method requires no escaping
* of the string contents.
*/
public class MatchMap
{
public static final String _rcsid = "@(#)$Id$";
/** This is the set of match regexp strings */
protected List<String> matchStrings;
/** This is the set of Pattern objects corresponding to the match regexp strings.
* It's null if the patterns have not been built yet. */
protected Pattern[] matchPatterns = null;
/** This is the set of replace strings */
protected List<String> replaceStrings;
/** Constructor. Build an empty matchmap. */
public MatchMap()
{
matchStrings = new ArrayList<String>();
replaceStrings = new ArrayList<String>();
}
/** Constructor. Build a matchmap from a single string. */
public MatchMap(String stringForm)
{
matchStrings = new ArrayList<String>();
replaceStrings = new ArrayList<String>();
StringBuilder matchString = new StringBuilder();
StringBuilder replaceString = new StringBuilder();
int i = 0;
while (i < stringForm.length())
{
matchString.setLength(0);
replaceString.setLength(0);
while (i < stringForm.length())
{
char x = stringForm.charAt(i);
if (x == '&' || x == '=')
break;
i++;
if (x == '\\' && i < stringForm.length())
x = stringForm.charAt(i++);
matchString.append(x);
}
if (i < stringForm.length())
{
char x = stringForm.charAt(i);
if (x == '=')
{
i++;
// Pick up the second string
while (i < stringForm.length())
{
x = stringForm.charAt(i);
if (x == '&')
break;
i++;
if (x == '\\' && i < stringForm.length())
x = stringForm.charAt(i++);
replaceString.append(x);
}
}
}
matchStrings.add(matchString.toString());
replaceStrings.add(replaceString.toString());
if (i < stringForm.length())
{
char x = stringForm.charAt(i);
if (x == '&')
i++;
}
}
}
/** Constructor. Build a matchmap from two lists representing match and replace strings */
public MatchMap(List<String> matchStrings, List<String> replaceStrings)
{
this.matchStrings = matchStrings;
this.replaceStrings = replaceStrings;
}
/** Get the number of match/replace strings */
public int getMatchCount()
{
return matchStrings.size();
}
/** Get a specific match string */
public String getMatchString(int index)
{
return matchStrings.get(index);
}
/** Get a specific replace string */
public String getReplaceString(int index)
{
return replaceStrings.get(index);
}
/** Delete a specified match/replace string pair */
public void deleteMatchPair(int index)
{
matchStrings.remove(index);
replaceStrings.remove(index);
matchPatterns = null;
}
/** Insert a match/replace string pair */
public void insertMatchPair(int index, String match, String replace)
{
matchStrings.add(index,match);
replaceStrings.add(index,replace);
matchPatterns = null;
}
/** Append a match/replace string pair */
public void appendMatchPair(String match, String replace)
{
matchStrings.add(match);
replaceStrings.add(replace);
matchPatterns = null;
}
/** Append old-style match/replace pair.
* This method translates old-style regexp and group output form to the
* current style before adding to the map.
*/
public void appendOldstyleMatchPair(String oldstyleMatch, String oldstyleReplace)
{
String newStyleMatch = "^" + oldstyleMatch + "$";
// Need to build a new-style replace string from the old one. To do that, use the
// original parser (which basically will guarantee that we get it right)
EvaluatorTokenStream et = new EvaluatorTokenStream(oldstyleReplace);
StringBuilder newStyleReplace = new StringBuilder();
while (true)
{
EvaluatorToken t = et.peek();
if (t == null)
break;
switch (t.getType())
{
case EvaluatorToken.TYPE_COMMA:
et.advance();
break;
case EvaluatorToken.TYPE_GROUP:
et.advance();
int groupNumber = t.getGroupNumber();
switch (t.getGroupStyle())
{
case EvaluatorToken.GROUPSTYLE_NONE:
newStyleReplace.append("$(").append(Integer.toString(groupNumber)).append(")");
break;
case EvaluatorToken.GROUPSTYLE_LOWER:
newStyleReplace.append("$(").append(Integer.toString(groupNumber)).append("l)");
break;
case EvaluatorToken.GROUPSTYLE_UPPER:
newStyleReplace.append("$(").append(Integer.toString(groupNumber)).append("u)");
break;
case EvaluatorToken.GROUPSTYLE_MIXED:
newStyleReplace.append("$(").append(Integer.toString(groupNumber)).append("m)");
break;
default:
break;
}
break;
case EvaluatorToken.TYPE_TEXT:
et.advance();
escape(newStyleReplace,t.getTextValue());
break;
default:
break;
}
}
appendMatchPair(newStyleMatch,newStyleReplace.toString());
}
/** Escape a string so it is verbatim */
protected static void escape(StringBuilder output, String input)
{
int i = 0;
while (i < input.length())
{
char x = input.charAt(i++);
if (x == '$')
output.append(x);
output.append(x);
}
}
/** Convert the matchmap to string form. */
public String toString()
{
int i = 0;
StringBuilder rval = new StringBuilder();
while (i < matchStrings.size())
{
String matchString = matchStrings.get(i);
String replaceString = replaceStrings.get(i);
if (i > 0)
rval.append('&');
stuff(rval,matchString);
rval.append('=');
stuff(rval,replaceString);
i++;
}
return rval.toString();
}
/** Stuff characters */
protected static void stuff(StringBuilder sb, String value)
{
int i = 0;
while (i < value.length())
{
char x = value.charAt(i++);
if (x == '\\' || x == '&' || x == '=')
sb.append('\\');
sb.append(x);
}
}
/** Perform a translation.
*/
public String translate(String input)
throws ManifoldCFException
{
// Build pattern vector if not already there
if (matchPatterns == null)
{
matchPatterns = new Pattern[matchStrings.size()];
int i = 0;
while (i < matchPatterns.length)
{
String regexp = matchStrings.get(i);
try
{
matchPatterns[i] = Pattern.compile(regexp);
}
catch (java.util.regex.PatternSyntaxException e)
{
matchPatterns = null;
throw new ManifoldCFException("For match expression '"+regexp+"', found pattern syntax error: "+e.getMessage(),e);
}
i++;
}
}
int j = 0;
while (j < matchPatterns.length)
{
Pattern p = matchPatterns[j];
// Construct a matcher
Matcher m = p.matcher(input);
// Grab the output description
String outputDescription = replaceStrings.get(j);
j++;
// Create a copy buffer
StringBuilder outputBuffer = new StringBuilder();
// Keep track of the index in the original string we have done up to
int currentIndex = 0;
// Scan the string using find, and for each one found, do a translation
while (true)
{
boolean foundOne = m.find();
if (foundOne == false)
{
// No subsequent match found.
// Copy everything from currentIndex until the end of input
outputBuffer.append(input.substring(currentIndex));
break;
}
// Do a translation. This involves copying everything in the input
// string up until the start of the match, then doing a replace for
// the match itself, and finally setting the currentIndex to the end
// of the match.
int matchStart = m.start(0);
int matchEnd = m.end(0);
if (matchStart == -1)
{
// The expression was degenerate; treat this as the end.
outputBuffer.append(input.substring(currentIndex));
break;
}
outputBuffer.append(input.substring(currentIndex,matchStart));
// Process translation description!
int i = 0;
while (i < outputDescription.length())
{
char x = outputDescription.charAt(i++);
if (x == '$' && i < outputDescription.length())
{
x = outputDescription.charAt(i++);
if (x == '(')
{
// Process evaluation expression
StringBuilder numberBuf = new StringBuilder();
boolean upper = false;
boolean lower = false;
boolean mixed = false;
while (i < outputDescription.length())
{
char y = outputDescription.charAt(i++);
if (y == ')')
break;
else if (y >= '0' && y <= '9')
numberBuf.append(y);
else if (y == 'u' || y == 'U')
upper = true;
else if (y == 'l' || y == 'L')
lower = true;
else if (y == 'm' || y == 'M')
mixed = true;
}
String number = numberBuf.toString();
try
{
int groupnum = Integer.parseInt(number);
String groupValue = m.group(groupnum);
if (upper)
outputBuffer.append(groupValue.toUpperCase());
else if (lower)
outputBuffer.append(groupValue.toLowerCase());
else if (mixed && groupValue.length() > 0)
outputBuffer.append(groupValue.substring(0,1).toUpperCase()).append(groupValue.substring(1).toLowerCase());
else
outputBuffer.append(groupValue);
}
catch (NumberFormatException e)
{
// Silently skip, because it's an illegal group number, so nothing
// gets added.
}
// Go back around, so we don't add the $ in
continue;
}
}
outputBuffer.append(x);
}
currentIndex = matchEnd;
}
input = outputBuffer.toString();
}
return input;
}
// Protected classes
// These classes are used to process the old token-based replacement strings
/** Evaluator token.
*/
protected static class EvaluatorToken
{
public final static int TYPE_GROUP = 0;
public final static int TYPE_TEXT = 1;
public final static int TYPE_COMMA = 2;
public final static int GROUPSTYLE_NONE = 0;
public final static int GROUPSTYLE_LOWER = 1;
public final static int GROUPSTYLE_UPPER = 2;
public final static int GROUPSTYLE_MIXED = 3;
protected int type;
protected int groupNumber = -1;
protected int groupStyle = GROUPSTYLE_NONE;
protected String textValue = null;
public EvaluatorToken()
{
type = TYPE_COMMA;
}
public EvaluatorToken(int groupNumber, int groupStyle)
{
type = TYPE_GROUP;
this.groupNumber = groupNumber;
this.groupStyle = groupStyle;
}
public EvaluatorToken(String text)
{
type = TYPE_TEXT;
this.textValue = text;
}
public int getType()
{
return type;
}
public int getGroupNumber()
{
return groupNumber;
}
public int getGroupStyle()
{
return groupStyle;
}
public String getTextValue()
{
return textValue;
}
}
/** Token stream.
*/
protected static class EvaluatorTokenStream
{
protected String text;
protected int pos;
protected EvaluatorToken token = null;
/** Constructor.
*/
public EvaluatorTokenStream(String text)
{
this.text = text;
this.pos = 0;
}
/** Get current token.
*/
public EvaluatorToken peek()
{
if (token == null)
{
token = nextToken();
}
return token;
}
/** Go on to next token.
*/
public void advance()
{
token = null;
}
protected EvaluatorToken nextToken()
{
char x;
// Fetch the next token
while (true)
{
if (pos == text.length())
return null;
x = text.charAt(pos);
if (x > ' ')
break;
pos++;
}
StringBuilder sb;
if (x == '"')
{
// Parse text
pos++;
sb = new StringBuilder();
while (true)
{
if (pos == text.length())
break;
x = text.charAt(pos);
pos++;
if (x == '"')
{
break;
}
if (x == '\\')
{
if (pos == text.length())
break;
x = text.charAt(pos++);
}
sb.append(x);
}
return new EvaluatorToken(sb.toString());
}
if (x == ',')
{
pos++;
return new EvaluatorToken();
}
// Eat number at beginning
sb = new StringBuilder();
while (true)
{
if (pos == text.length())
break;
x = text.charAt(pos);
if (x >= '0' && x <= '9')
{
sb.append(x);
pos++;
continue;
}
break;
}
String numberValue = sb.toString();
int groupNumber = 0;
if (numberValue.length() > 0)
groupNumber = new Integer(numberValue).intValue();
// Save the next char position
int modifierPos = pos;
// Go to the end of the word
while (true)
{
if (pos == text.length())
break;
x = text.charAt(pos);
if (x == ',' || x >= '0' && x <= '9' || x <= ' ' && x >= 0)
break;
pos++;
}
int style = EvaluatorToken.GROUPSTYLE_NONE;
if (modifierPos != pos)
{
String modifier = text.substring(modifierPos,pos);
if (modifier.startsWith("u"))
style = EvaluatorToken.GROUPSTYLE_UPPER;
else if (modifier.startsWith("l"))
style = EvaluatorToken.GROUPSTYLE_LOWER;
else if (modifier.startsWith("m"))
style = EvaluatorToken.GROUPSTYLE_MIXED;
}
return new EvaluatorToken(groupNumber,style);
}
}
}