blob: 1497aad42894ba64f25d7587320c593bb3ce0ff4 [file] [log] [blame]
/*
JSPWiki - a JSP-based WikiWiki clone.
Copyright (C) 2001-2002 Janne Jalkanen (Janne.Jalkanen@iki.fi)
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.ecyrd.jspwiki;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
/**
* Contains a number of static utility methods.
*/
public class TextUtil
{
static final String HEX_DIGITS = "0123456789ABCDEF";
/**
* java.net.URLEncoder.encode() method in JDK < 1.4 is buggy. This duplicates
* its functionality.
*/
protected static String urlEncode( byte[] rs )
{
StringBuffer result = new StringBuffer();
// Does the URLEncoding. We could use the java.net one, but
// it does not eat byte[]s.
for( int i = 0; i < rs.length; i++ )
{
char c = (char) rs[i];
switch( c )
{
case '_':
case '.':
case '*':
case '-':
case '/':
result.append( c );
break;
case ' ':
result.append( '+' );
break;
default:
if( (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') )
{
result.append( c );
}
else
{
result.append( '%' );
result.append( HEX_DIGITS.charAt( (c & 0xF0) >> 4 ) );
result.append( HEX_DIGITS.charAt( c & 0x0F ) );
}
}
} // for
return result.toString();
}
/**
* URL encoder does not handle all characters correctly.
* See <A HREF="http://developer.java.sun.com/developer/bugParade/bugs/4257115.html">
* Bug parade, bug #4257115</A> for more information.
* <P>
* Thanks to CJB for this fix.
*/
protected static String urlDecode( byte[] bytes, String encoding )
throws UnsupportedEncodingException,
IllegalArgumentException
{
if(bytes == null)
{
return null;
}
byte[] decodeBytes = new byte[bytes.length];
int decodedByteCount = 0;
try
{
for( int count = 0; count < bytes.length; count++ )
{
switch( bytes[count] )
{
case '+':
decodeBytes[decodedByteCount++] = (byte) ' ';
break ;
case '%':
decodeBytes[decodedByteCount++] = (byte)((HEX_DIGITS.indexOf(bytes[++count]) << 4) +
(HEX_DIGITS.indexOf(bytes[++count])) );
break ;
default:
decodeBytes[decodedByteCount++] = bytes[count] ;
}
}
}
catch (IndexOutOfBoundsException ae)
{
throw new IllegalArgumentException( "Malformed UTF-8 string?" );
}
String processedPageName = null ;
try
{
processedPageName = new String(decodeBytes, 0, decodedByteCount, encoding) ;
}
catch (UnsupportedEncodingException e)
{
throw new UnsupportedEncodingException( "UTF-8 encoding not supported on this platform" );
}
return(processedPageName.toString());
}
/**
* As java.net.URLEncoder class, but this does it in UTF8 character set.
*/
public static String urlEncodeUTF8( String text )
{
byte[] rs = {};
try
{
rs = text.getBytes("UTF-8");
return urlEncode( rs );
}
catch( UnsupportedEncodingException e )
{
throw new InternalWikiException("UTF-8 not supported!?!");
}
}
/**
* As java.net.URLDecoder class, but for UTF-8 strings.
*/
public static String urlDecodeUTF8( String utf8 )
{
String rs = null;
try
{
rs = urlDecode( utf8.getBytes("ISO-8859-1"), "UTF-8" );
}
catch( UnsupportedEncodingException e )
{
throw new InternalWikiException("UTF-8 or ISO-8859-1 not supported!?!");
}
return rs;
}
/**
* Provides encoded version of string depending on encoding.
* Encoding may be UTF-8 or ISO-8859-1 (default).
*
* <p>This implementation is the same as in
* FileSystemProvider.mangleName().
*/
public static String urlEncode( String data, String encoding )
{
// Presumably, the same caveats apply as in FileSystemProvider.
// Don't see why it would be horribly kludgy, though.
if( "UTF-8".equals( encoding ) )
{
return( TextUtil.urlEncodeUTF8( data ) );
}
try
{
return( TextUtil.urlEncode( data.getBytes(encoding) ) );
}
catch (UnsupportedEncodingException uee)
{
throw new InternalWikiException("Could not encode String into" + encoding);
}
}
/**
* Provides decoded version of string depending on encoding.
* Encoding may be UTF-8 or ISO-8859-1 (default).
*
* <p>This implementation is the same as in
* FileSystemProvider.unmangleName().
*/
public static String urlDecode( String data, String encoding )
throws UnsupportedEncodingException,
IllegalArgumentException
{
// Presumably, the same caveats apply as in FileSystemProvider.
// Don't see why it would be horribly kludgy, though.
if( "UTF-8".equals( encoding ) )
{
return( TextUtil.urlDecodeUTF8( data ) );
}
try
{
return( TextUtil.urlDecode( data.getBytes(encoding), encoding ) );
}
catch (UnsupportedEncodingException uee)
{
throw new InternalWikiException("Could not decode String into" + encoding);
}
}
/**
* Replaces the relevant entities inside the String.
* All &amp; &gt;, &lt;, and &quot; are replaced by their
* respective names.
*
* @since 1.6.1
*/
public static String replaceEntities( String src )
{
src = replaceString( src, "&", "&amp;" );
src = replaceString( src, "<", "&lt;" );
src = replaceString( src, ">", "&gt;" );
src = replaceString( src, "\"", "&quot;" );
return src;
}
/**
* Replaces a string with an other string.
*
* @param orig Original string. Null is safe.
* @param src The string to find.
* @param dest The string to replace <I>src</I> with.
*/
public static String replaceString( String orig, String src, String dest )
{
if( orig == null ) return null;
StringBuffer res = new StringBuffer();
int start, end = 0, last = 0;
while( (start = orig.indexOf(src,end)) != -1 )
{
res.append( orig.substring( last, start ) );
res.append( dest );
end = start+src.length();
last = start+src.length();
}
res.append( orig.substring( end ) );
return res.toString();
}
/**
* Replaces a part of a string with a new String.
*
* @param start Where in the original string the replacing should start.
* @param end Where the replacing should end.
* @param orig Original string. Null is safe.
* @param text The new text to insert into the string.
*/
public static String replaceString( String orig, int start, int end, String text )
{
if( orig == null ) return null;
StringBuffer buf = new StringBuffer(orig);
buf.replace( start, end, text );
return buf.toString();
}
/**
* Parses an integer parameter, returning a default value
* if the value is null or a non-number.
*/
public static int parseIntParameter( String value, int defvalue )
{
int val = defvalue;
try
{
val = Integer.parseInt( value.trim() );
}
catch( Exception e ) {}
return val;
}
/**
* Gets an integer-valued property from a standard Properties
* list. If the value does not exist, or is a non-integer, returns defVal.
*
* @since 2.1.48.
*/
public static int getIntegerProperty( Properties props,
String key,
int defVal )
{
String val = props.getProperty( key );
return parseIntParameter( val, defVal );
}
/**
* Gets a boolean property from a standard Properties list.
* Returns the default value, in case the key has not been set.
* <P>
* The possible values for the property are "true"/"false", "yes"/"no", or
* "on"/"off". Any value not recognized is always defined as "false".
*
* @param props A list of properties to search.
* @param key The property key.
* @param defval The default value to return.
*
* @return True, if the property "key" was set to "true", "on", or "yes".
*
* @since 2.0.11
*/
public static boolean getBooleanProperty( Properties props,
String key,
boolean defval )
{
String val = props.getProperty( key );
if( val == null ) return defval;
return isPositive( val );
}
/**
* Fetches a String property from the set of Properties. This differs from
* Properties.getProperty() in a couple of key respects: First, property value
* is trim()med (so no extra whitespace back and front), and well, that's it.
*
* @param props The Properties to search through
* @param key The property key
* @param defval A default value to return, if the property does not exist.
* @return The property value.
* @since 2.1.151
*/
public static String getStringProperty( Properties props,
String key,
String defval )
{
String val = props.getProperty( key );
if( val == null ) return defval;
return val.trim();
}
/**
* Returns true, if the string "val" denotes a positive string. Allowed
* values are "yes", "on", and "true". Comparison is case-insignificant.
* Null values are safe.
*
* @param val Value to check.
* @return True, if val is "true", "on", or "yes"; otherwise false.
*
* @since 2.0.26
*/
public static boolean isPositive( String val )
{
if( val == null ) return false;
val = val.trim();
return ( val.equalsIgnoreCase("true") || val.equalsIgnoreCase("on") ||
val.equalsIgnoreCase("yes") );
}
/**
* Makes sure that the POSTed data is conforms to certain rules. These
* rules are:
* <UL>
* <LI>The data always ends with a newline (some browsers, such
* as NS4.x series, does not send a newline at the end, which makes
* the diffs a bit strange sometimes.
* <LI>The CR/LF/CRLF mess is normalized to plain CRLF.
* </UL>
*
* The reason why we're using CRLF is that most browser already
* return CRLF since that is the closest thing to a HTTP standard.
*/
public static String normalizePostData( String postData )
{
StringBuffer sb = new StringBuffer();
for( int i = 0; i < postData.length(); i++ )
{
switch( postData.charAt(i) )
{
case 0x0a: // LF, UNIX
sb.append( "\r\n" );
break;
case 0x0d: // CR, either Mac or MSDOS
sb.append( "\r\n" );
// If it's MSDOS, skip the LF so that we don't add it again.
if( i < postData.length()-1 && postData.charAt(i+1) == 0x0a )
{
i++;
}
break;
default:
sb.append( postData.charAt(i) );
break;
}
}
if( sb.length() < 2 || !sb.substring( sb.length()-2 ).equals("\r\n") )
{
sb.append( "\r\n" );
}
return sb.toString();
}
private static final int EOI = 0;
private static final int LOWER = 1;
private static final int UPPER = 2;
private static final int DIGIT = 3;
private static final int OTHER = 4;
private static int getCharKind(int c)
{
if (c==-1)
{
return EOI;
}
char ch = (char) c;
if (Character.isLowerCase(ch))
return LOWER;
else if (Character.isUpperCase(ch))
return UPPER;
else if (Character.isDigit(ch))
return DIGIT;
else
return OTHER;
}
/**
* Adds spaces in suitable locations of the input string. This is
* used to transform a WikiName into a more readable format.
*
* @param s String to be beautified.
* @return A beautified string.
*/
public static String beautifyString( String s )
{
return beautifyString( s, " " );
}
/**
* Adds spaces in suitable locations of the input string. This is
* used to transform a WikiName into a more readable format.
*
* @param s String to be beautified.
* @param space Use this string for the space character.
* @return A beautified string.
* @since 2.1.127
*/
public static String beautifyString( String s, String space )
{
StringBuffer result = new StringBuffer();
if( s == null || s.length() == 0 ) return "";
int cur = s.charAt(0);
int curKind = getCharKind(cur);
int prevKind = LOWER;
int nextKind = -1;
int next = -1;
int nextPos = 1;
while( curKind != EOI )
{
next = (nextPos < s.length()) ? s.charAt(nextPos++) : -1;
nextKind = getCharKind( next );
if( (prevKind == UPPER) && (curKind == UPPER) && (nextKind == LOWER) )
{
result.append(space);
result.append((char) cur);
}
else
{
result.append((char) cur);
if( ( (curKind == UPPER) && (nextKind == DIGIT) )
|| ( (curKind == LOWER) && ((nextKind == DIGIT) || (nextKind == UPPER)) )
|| ( (curKind == DIGIT) && ((nextKind == UPPER) || (nextKind == LOWER)) ))
{
result.append(space);
}
}
prevKind = curKind;
cur = next;
curKind = nextKind;
}
return result.toString();
}
/**
* Creates a Properties object based on an array which contains alternatively
* a key and a value. It is useful for generating default mappings.
* For example:
* <pre>
* String[] properties = { "jspwiki.property1", "value1",
* "jspwiki.property2", "value2 };
*
* Properties props = TextUtil.createPropertes( values );
*
* System.out.println( props.getProperty("jspwiki.property1") );
* </pre>
* would output "value1".
*
* @param values Alternating key and value pairs.
* @return Property object
* @see java.util.Properties
* @throws IllegalArgumentException, if the property array is missing
* a value for a key.
* @since 2.2.
*/
public static Properties createProperties( String[] values )
throws IllegalArgumentException
{
if( values.length % 2 != 0 )
throw new IllegalArgumentException( "One value is missing.");
Properties props = new Properties();
for( int i = 0; i < values.length; i += 2 )
{
props.setProperty( values[i], values[i+1] );
}
return props;
}
/**
* Counts the number of sections (separated with "----") from the page.
*
* @param pagedata The WikiText to parse.
* @return int Number of counted sections.
* @since 2.1.86.
*/
public static int countSections( String pagedata )
{
int tags = 0;
int start = 0;
while( (start = pagedata.indexOf("----",start)) != -1 )
{
tags++;
start+=4; // Skip this "----"
}
//
// The first section does not get the "----"
//
return pagedata.length() > 0 ? tags+1 : 0;
}
/**
* Gets the given section (separated with "----") from the page text.
* Note that the first section is always #1. If a page has no section markers,
* them there is only a single section, #1.
*
* @param pagedata WikiText to parse.
* @param section Which section to get.
* @return String The section.
* @throws IllegalArgumentException If the page does not contain this many sections.
* @since 2.1.86.
*/
public static String getSection( String pagedata, int section )
throws IllegalArgumentException
{
int tags = 0;
int start = 0;
int previous = 0;
while( (start = pagedata.indexOf("----",start)) != -1 )
{
if( ++tags == section )
{
return pagedata.substring( previous, start );
}
start += 4; // Skip this "----"
previous = start;
}
if( ++tags == section )
{
return pagedata.substring( previous );
}
throw new IllegalArgumentException("There is no section no. "+section+" on the page.");
}
/**
* A simple routine which just repeates the arguments. This is useful
* for creating something like a line or something.
*
* @param what String to repeat
* @param times How many times to repeat the string.
* @return Guess what?
* @since 2.1.98.
*/
public static String repeatString( String what, int times )
{
StringBuffer sb = new StringBuffer();
for( int i = 0; i < times; i++ )
{
sb.append( what );
}
return sb.toString();
}
/**
* Converts a string from the Unicode representation into something that can be
* embedded in a java properties file. All references outside the ASCII range
* are replaced with \\uXXXX.
*
* @param s
* @return the ASCII string
*/
public static String native2Ascii(String s)
{
StringBuffer sb = new StringBuffer();
for(int i = 0; i < s.length(); i++)
{
char aChar = s.charAt(i);
if ((aChar < 0x0020) || (aChar > 0x007e))
{
sb.append('\\');
sb.append('u');
sb.append(toHex((aChar >> 12) & 0xF));
sb.append(toHex((aChar >> 8) & 0xF));
sb.append(toHex((aChar >> 4) & 0xF));
sb.append(toHex( aChar & 0xF));
}
else
{
sb.append(aChar);
}
}
return sb.toString();
}
private static char toHex(int nibble)
{
final char[] hexDigit =
{
'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
};
return hexDigit[(nibble & 0xF)];
}
/**
* Returns true, if the argument contains a number, otherwise false.
* In a quick test this is roughly the same speed as Integer.parseInt()
* if the argument is a number, and roughly ten times the speed, if
* the argument is NOT a number.
*
* @since 2.4
*/
public static boolean isNumber( String s )
{
if( s == null ) return false;
if( s.length() > 1 && s.charAt(0) == '-' )
s = s.substring(1);
for( int i = 0; i < s.length(); i++ )
{
if( !Character.isDigit(s.charAt(i)) )
return false;
}
return true;
}
}