blob: f627cb8d2f41279c96417a3b95e4ffd09ed47b15 [file] [log] [blame]
/*
* Utils.java
*
*/
package com.yahoo.labs.samoa.moa.core;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 1999 - 2012 University of Waikato, Hamilton, New Zealand
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.lang.reflect.Array;
import java.net.URL;
import java.text.BreakIterator;
import java.util.Enumeration;
import java.util.Properties;
import java.util.Random;
import java.util.Vector;
/**
* Class implementing some simple utility methods.
*
* @author Eibe Frank
* @author Yong Wang
* @author Len Trigg
* @author Julien Prados
* @version $Revision: 8080 $
*/
public final class Utils {
/** The natural logarithm of 2. */
public static double log2 = Math.log(2);
/** The small deviation allowed in double comparisons. */
public static double SMALL = 1e-6;
/**
* Tests if the given value codes "missing".
*
* @param val the value to be tested
* @return true if val codes "missing"
*/
public static boolean isMissingValue(double val) {
return Double.isNaN(val);
}
/**
* Returns the value used to code a missing value. Note that
* equality tests on this value will always return false, so use
* isMissingValue(double val) for testing..
*
* @return the value used as missing value.
*/
public static double missingValue() {
return Double.NaN;
}
/**
* Casting an object without "unchecked" compile-time warnings.
* Use only when absolutely necessary (e.g. when using clone()).
*/
@SuppressWarnings("unchecked")
public static <T> T cast(Object x) {
return (T) x;
}
/**
* Returns the correlation coefficient of two double vectors.
*
* @param y1 double vector 1
* @param y2 double vector 2
* @param n the length of two double vectors
* @return the correlation coefficient
*/
public static final double correlation(double y1[],double y2[],int n) {
int i;
double av1 = 0.0, av2 = 0.0, y11 = 0.0, y22 = 0.0, y12 = 0.0, c;
if (n <= 1) {
return 1.0;
}
for (i = 0; i < n; i++) {
av1 += y1[i];
av2 += y2[i];
}
av1 /= (double) n;
av2 /= (double) n;
for (i = 0; i < n; i++) {
y11 += (y1[i] - av1) * (y1[i] - av1);
y22 += (y2[i] - av2) * (y2[i] - av2);
y12 += (y1[i] - av1) * (y2[i] - av2);
}
if (y11 * y22 == 0.0) {
c=1.0;
} else {
c = y12 / Math.sqrt(Math.abs(y11 * y22));
}
return c;
}
/**
* Removes all occurrences of a string from another string.
*
* @param inString the string to remove substrings from.
* @param substring the substring to remove.
* @return the input string with occurrences of substring removed.
*/
public static String removeSubstring(String inString, String substring) {
StringBuffer result = new StringBuffer();
int oldLoc = 0, loc = 0;
while ((loc = inString.indexOf(substring, oldLoc))!= -1) {
result.append(inString.substring(oldLoc, loc));
oldLoc = loc + substring.length();
}
result.append(inString.substring(oldLoc));
return result.toString();
}
/**
* Replaces with a new string, all occurrences of a string from
* another string.
*
* @param inString the string to replace substrings in.
* @param subString the substring to replace.
* @param replaceString the replacement substring
* @return the input string with occurrences of substring replaced.
*/
public static String replaceSubstring(String inString, String subString,
String replaceString) {
StringBuffer result = new StringBuffer();
int oldLoc = 0, loc = 0;
while ((loc = inString.indexOf(subString, oldLoc))!= -1) {
result.append(inString.substring(oldLoc, loc));
result.append(replaceString);
oldLoc = loc + subString.length();
}
result.append(inString.substring(oldLoc));
return result.toString();
}
/**
* Pads a string to a specified length, inserting spaces on the left
* as required. If the string is too long, characters are removed (from
* the right).
*
* @param inString the input string
* @param length the desired length of the output string
* @return the output string
*/
public static String padLeft(String inString, int length) {
return fixStringLength(inString, length, false);
}
/**
* Pads a string to a specified length, inserting spaces on the right
* as required. If the string is too long, characters are removed (from
* the right).
*
* @param inString the input string
* @param length the desired length of the output string
* @return the output string
*/
public static String padRight(String inString, int length) {
return fixStringLength(inString, length, true);
}
/**
* Pads a string to a specified length, inserting spaces as
* required. If the string is too long, characters are removed (from
* the right).
*
* @param inString the input string
* @param length the desired length of the output string
* @param right true if inserted spaces should be added to the right
* @return the output string
*/
private static /*@pure@*/ String fixStringLength(String inString, int length,
boolean right) {
if (inString.length() < length) {
while (inString.length() < length) {
inString = (right ? inString.concat(" ") : " ".concat(inString));
}
} else if (inString.length() > length) {
inString = inString.substring(0, length);
}
return inString;
}
/**
* Rounds a double and converts it into String.
*
* @param value the double value
* @param afterDecimalPoint the (maximum) number of digits permitted
* after the decimal point
* @return the double as a formatted string
*/
public static /*@pure@*/ String doubleToString(double value, int afterDecimalPoint) {
StringBuffer stringBuffer;
double temp;
int dotPosition;
long precisionValue;
temp = value * Math.pow(10.0, afterDecimalPoint);
if (Math.abs(temp) < Long.MAX_VALUE) {
precisionValue = (temp > 0) ? (long)(temp + 0.5)
: -(long)(Math.abs(temp) + 0.5);
if (precisionValue == 0) {
stringBuffer = new StringBuffer(String.valueOf(0));
} else {
stringBuffer = new StringBuffer(String.valueOf(precisionValue));
}
if (afterDecimalPoint == 0) {
return stringBuffer.toString();
}
dotPosition = stringBuffer.length() - afterDecimalPoint;
while (((precisionValue < 0) && (dotPosition < 1)) ||
(dotPosition < 0)) {
if (precisionValue < 0) {
stringBuffer.insert(1, '0');
} else {
stringBuffer.insert(0, '0');
}
dotPosition++;
}
stringBuffer.insert(dotPosition, '.');
if ((precisionValue < 0) && (stringBuffer.charAt(1) == '.')) {
stringBuffer.insert(1, '0');
} else if (stringBuffer.charAt(0) == '.') {
stringBuffer.insert(0, '0');
}
int currentPos = stringBuffer.length() - 1;
while ((currentPos > dotPosition) &&
(stringBuffer.charAt(currentPos) == '0')) {
stringBuffer.setCharAt(currentPos--, ' ');
}
if (stringBuffer.charAt(currentPos) == '.') {
stringBuffer.setCharAt(currentPos, ' ');
}
return stringBuffer.toString().trim();
}
return new String("" + value);
}
/**
* Rounds a double and converts it into a formatted decimal-justified String.
* Trailing 0's are replaced with spaces.
*
* @param value the double value
* @param width the width of the string
* @param afterDecimalPoint the number of digits after the decimal point
* @return the double as a formatted string
*/
public static /*@pure@*/ String doubleToString(double value, int width,
int afterDecimalPoint) {
String tempString = doubleToString(value, afterDecimalPoint);
char[] result;
int dotPosition;
if ((afterDecimalPoint >= width)
|| (tempString.indexOf('E') != -1)) { // Protects sci notation
return tempString;
}
// Initialize result
result = new char[width];
for (int i = 0; i < result.length; i++) {
result[i] = ' ';
}
if (afterDecimalPoint > 0) {
// Get position of decimal point and insert decimal point
dotPosition = tempString.indexOf('.');
if (dotPosition == -1) {
dotPosition = tempString.length();
} else {
result[width - afterDecimalPoint - 1] = '.';
}
} else {
dotPosition = tempString.length();
}
int offset = width - afterDecimalPoint - dotPosition;
if (afterDecimalPoint > 0) {
offset--;
}
// Not enough room to decimal align within the supplied width
if (offset < 0) {
return tempString;
}
// Copy characters before decimal point
for (int i = 0; i < dotPosition; i++) {
result[offset + i] = tempString.charAt(i);
}
// Copy characters after decimal point
for (int i = dotPosition + 1; i < tempString.length(); i++) {
result[offset + i] = tempString.charAt(i);
}
return new String(result);
}
/**
* Returns the basic class of an array class (handles multi-dimensional
* arrays).
* @param c the array to inspect
* @return the class of the innermost elements
*/
public static Class getArrayClass(Class c) {
if (c.getComponentType().isArray())
return getArrayClass(c.getComponentType());
else
return c.getComponentType();
}
/**
* Returns the dimensions of the given array. Even though the
* parameter is of type "Object" one can hand over primitve arrays, e.g.
* int[3] or double[2][4].
*
* @param array the array to determine the dimensions for
* @return the dimensions of the array
*/
public static int getArrayDimensions(Class array) {
if (array.getComponentType().isArray())
return 1 + getArrayDimensions(array.getComponentType());
else
return 1;
}
/**
* Returns the dimensions of the given array. Even though the
* parameter is of type "Object" one can hand over primitve arrays, e.g.
* int[3] or double[2][4].
*
* @param array the array to determine the dimensions for
* @return the dimensions of the array
*/
public static int getArrayDimensions(Object array) {
return getArrayDimensions(array.getClass());
}
/**
* Returns the given Array in a string representation. Even though the
* parameter is of type "Object" one can hand over primitve arrays, e.g.
* int[3] or double[2][4].
*
* @param array the array to return in a string representation
* @return the array as string
*/
public static String arrayToString(Object array) {
String result;
int dimensions;
int i;
result = "";
dimensions = getArrayDimensions(array);
if (dimensions == 0) {
result = "null";
}
else if (dimensions == 1) {
for (i = 0; i < Array.getLength(array); i++) {
if (i > 0)
result += ",";
if (Array.get(array, i) == null)
result += "null";
else
result += Array.get(array, i).toString();
}
}
else {
for (i = 0; i < Array.getLength(array); i++) {
if (i > 0)
result += ",";
result += "[" + arrayToString(Array.get(array, i)) + "]";
}
}
return result;
}
/**
* Tests if a is equal to b.
*
* @param a a double
* @param b a double
*/
public static /*@pure@*/ boolean eq(double a, double b){
return (a - b < SMALL) && (b - a < SMALL);
}
/**
* Checks if the given array contains any non-empty options.
*
* @param options an array of strings
* @exception Exception if there are any non-empty options
*/
public static void checkForRemainingOptions(String[] options)
throws Exception {
int illegalOptionsFound = 0;
StringBuffer text = new StringBuffer();
if (options == null) {
return;
}
for (int i = 0; i < options.length; i++) {
if (options[i].length() > 0) {
illegalOptionsFound++;
text.append(options[i] + ' ');
}
}
if (illegalOptionsFound > 0) {
throw new Exception("Illegal options: " + text);
}
}
/**
* Checks if the given array contains the flag "-Char". Stops
* searching at the first marker "--". If the flag is found,
* it is replaced with the empty string.
*
* @param flag the character indicating the flag.
* @param options the array of strings containing all the options.
* @return true if the flag was found
* @exception Exception if an illegal option was found
*/
public static boolean getFlag(char flag, String[] options)
throws Exception {
return getFlag("" + flag, options);
}
/**
* Checks if the given array contains the flag "-String". Stops
* searching at the first marker "--". If the flag is found,
* it is replaced with the empty string.
*
* @param flag the String indicating the flag.
* @param options the array of strings containing all the options.
* @return true if the flag was found
* @exception Exception if an illegal option was found
*/
public static boolean getFlag(String flag, String[] options)
throws Exception {
int pos = getOptionPos(flag, options);
if (pos > -1)
options[pos] = "";
return (pos > -1);
}
/**
* Gets an option indicated by a flag "-Char" from the given array
* of strings. Stops searching at the first marker "--". Replaces
* flag and option with empty strings.
*
* @param flag the character indicating the option.
* @param options the array of strings containing all the options.
* @return the indicated option or an empty string
* @exception Exception if the option indicated by the flag can't be found
*/
public static /*@non_null@*/ String getOption(char flag, String[] options)
throws Exception {
return getOption("" + flag, options);
}
/**
* Gets an option indicated by a flag "-String" from the given array
* of strings. Stops searching at the first marker "--". Replaces
* flag and option with empty strings.
*
* @param flag the String indicating the option.
* @param options the array of strings containing all the options.
* @return the indicated option or an empty string
* @exception Exception if the option indicated by the flag can't be found
*/
public static /*@non_null@*/ String getOption(String flag, String[] options)
throws Exception {
String newString;
int i = getOptionPos(flag, options);
if (i > -1) {
if (options[i].equals("-" + flag)) {
if (i + 1 == options.length) {
throw new Exception("No value given for -" + flag + " option.");
}
options[i] = "";
newString = new String(options[i + 1]);
options[i + 1] = "";
return newString;
}
if (options[i].charAt(1) == '-') {
return "";
}
}
return "";
}
/**
* Gets the index of an option or flag indicated by a flag "-Char" from
* the given array of strings. Stops searching at the first marker "--".
*
* @param flag the character indicating the option.
* @param options the array of strings containing all the options.
* @return the position if found, or -1 otherwise
*/
public static int getOptionPos(char flag, String[] options) {
return getOptionPos("" + flag, options);
}
/**
* Gets the index of an option or flag indicated by a flag "-String" from
* the given array of strings. Stops searching at the first marker "--".
*
* @param flag the String indicating the option.
* @param options the array of strings containing all the options.
* @return the position if found, or -1 otherwise
*/
public static int getOptionPos(String flag, String[] options) {
if (options == null)
return -1;
for (int i = 0; i < options.length; i++) {
if ((options[i].length() > 0) && (options[i].charAt(0) == '-')) {
// Check if it is a negative number
try {
Double.valueOf(options[i]);
}
catch (NumberFormatException e) {
// found?
if (options[i].equals("-" + flag))
return i;
// did we reach "--"?
if (options[i].charAt(1) == '-')
return -1;
}
}
}
return -1;
}
/**
* Quotes a string if it contains special characters.
*
* The following rules are applied:
*
* A character is backquoted version of it is one
* of <tt>" ' % \ \n \r \t</tt>.
*
* A string is enclosed within single quotes if a character has been
* backquoted using the previous rule above or contains
* <tt>{ }</tt> or is exactly equal to the strings
* <tt>, ? space or ""</tt> (empty string).
*
* A quoted question mark distinguishes it from the missing value which
* is represented as an unquoted question mark in arff files.
*
* @param string the string to be quoted
* @return the string (possibly quoted)
* @see #unquote(String)
*/
public static /*@pure@*/ String quote(String string) {
boolean quote = false;
// backquote the following characters
if ((string.indexOf('\n') != -1) || (string.indexOf('\r') != -1) ||
(string.indexOf('\'') != -1) || (string.indexOf('"') != -1) ||
(string.indexOf('\\') != -1) ||
(string.indexOf('\t') != -1) || (string.indexOf('%') != -1) ||
(string.indexOf('\u001E') != -1)) {
string = backQuoteChars(string);
quote = true;
}
// Enclose the string in 's if the string contains a recently added
// backquote or contains one of the following characters.
if((quote == true) ||
(string.indexOf('{') != -1) || (string.indexOf('}') != -1) ||
(string.indexOf(',') != -1) || (string.equals("?")) ||
(string.indexOf(' ') != -1) || (string.equals(""))) {
string = ("'".concat(string)).concat("'");
}
return string;
}
/**
* unquotes are previously quoted string (but only if necessary), i.e., it
* removes the single quotes around it. Inverse to quote(String).
*
* @param string the string to process
* @return the unquoted string
* @see #quote(String)
*/
public static String unquote(String string) {
if (string.startsWith("'") && string.endsWith("'")) {
string = string.substring(1, string.length() - 1);
if ((string.indexOf("\\n") != -1) || (string.indexOf("\\r") != -1) ||
(string.indexOf("\\'") != -1) || (string.indexOf("\\\"") != -1) ||
(string.indexOf("\\\\") != -1) ||
(string.indexOf("\\t") != -1) || (string.indexOf("\\%") != -1) ||
(string.indexOf("\\u001E") != -1)) {
string = unbackQuoteChars(string);
}
}
return string;
}
/**
* Converts carriage returns and new lines in a string into \r and \n.
* Backquotes the following characters: ` " \ \t and %
*
* @param string the string
* @return the converted string
* @see #unbackQuoteChars(String)
*/
public static /*@pure@*/ String backQuoteChars(String string) {
int index;
StringBuffer newStringBuffer;
// replace each of the following characters with the backquoted version
char charsFind[] = {'\\', '\'', '\t', '\n', '\r', '"', '%',
'\u001E'};
String charsReplace[] = {"\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%",
"\\u001E"};
for (int i = 0; i < charsFind.length; i++) {
if (string.indexOf(charsFind[i]) != -1 ) {
newStringBuffer = new StringBuffer();
while ((index = string.indexOf(charsFind[i])) != -1) {
if (index > 0) {
newStringBuffer.append(string.substring(0, index));
}
newStringBuffer.append(charsReplace[i]);
if ((index + 1) < string.length()) {
string = string.substring(index + 1);
} else {
string = "";
}
}
newStringBuffer.append(string);
string = newStringBuffer.toString();
}
}
return string;
}
/**
* Converts carriage returns and new lines in a string into \r and \n.
*
* @param string the string
* @return the converted string
*/
public static String convertNewLines(String string) {
int index;
// Replace with \n
StringBuffer newStringBuffer = new StringBuffer();
while ((index = string.indexOf('\n')) != -1) {
if (index > 0) {
newStringBuffer.append(string.substring(0, index));
}
newStringBuffer.append('\\');
newStringBuffer.append('n');
if ((index + 1) < string.length()) {
string = string.substring(index + 1);
} else {
string = "";
}
}
newStringBuffer.append(string);
string = newStringBuffer.toString();
// Replace with \r
newStringBuffer = new StringBuffer();
while ((index = string.indexOf('\r')) != -1) {
if (index > 0) {
newStringBuffer.append(string.substring(0, index));
}
newStringBuffer.append('\\');
newStringBuffer.append('r');
if ((index + 1) < string.length()){
string = string.substring(index + 1);
} else {
string = "";
}
}
newStringBuffer.append(string);
return newStringBuffer.toString();
}
/**
* Reverts \r and \n in a string into carriage returns and new lines.
*
* @param string the string
* @return the converted string
*/
public static String revertNewLines(String string) {
int index;
// Replace with \n
StringBuffer newStringBuffer = new StringBuffer();
while ((index = string.indexOf("\\n")) != -1) {
if (index > 0) {
newStringBuffer.append(string.substring(0, index));
}
newStringBuffer.append('\n');
if ((index + 2) < string.length()) {
string = string.substring(index + 2);
} else {
string = "";
}
}
newStringBuffer.append(string);
string = newStringBuffer.toString();
// Replace with \r
newStringBuffer = new StringBuffer();
while ((index = string.indexOf("\\r")) != -1) {
if (index > 0) {
newStringBuffer.append(string.substring(0, index));
}
newStringBuffer.append('\r');
if ((index + 2) < string.length()){
string = string.substring(index + 2);
} else {
string = "";
}
}
newStringBuffer.append(string);
return newStringBuffer.toString();
}
/**
* Returns the secondary set of options (if any) contained in
* the supplied options array. The secondary set is defined to
* be any options after the first "--". These options are removed from
* the original options array.
*
* @param options the input array of options
* @return the array of secondary options
*/
public static String[] partitionOptions(String[] options) {
for (int i = 0; i < options.length; i++) {
if (options[i].equals("--")) {
options[i++] = "";
String[] result = new String [options.length - i];
for (int j = i; j < options.length; j++) {
result[j - i] = options[j];
options[j] = "";
}
return result;
}
}
return new String [0];
}
/**
* The inverse operation of backQuoteChars().
* Converts back-quoted carriage returns and new lines in a string
* to the corresponding character ('\r' and '\n').
* Also "un"-back-quotes the following characters: ` " \ \t and %
*
* @param string the string
* @return the converted string
* @see #backQuoteChars(String)
*/
public static String unbackQuoteChars(String string) {
int index;
StringBuffer newStringBuffer;
// replace each of the following characters with the backquoted version
String charsFind[] = {"\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%",
"\\u001E"};
char charsReplace[] = {'\\', '\'', '\t', '\n', '\r', '"', '%',
'\u001E'};
int pos[] = new int[charsFind.length];
int curPos;
String str = new String(string);
newStringBuffer = new StringBuffer();
while (str.length() > 0) {
// get positions and closest character to replace
curPos = str.length();
index = -1;
for (int i = 0; i < pos.length; i++) {
pos[i] = str.indexOf(charsFind[i]);
if ( (pos[i] > -1) && (pos[i] < curPos) ) {
index = i;
curPos = pos[i];
}
}
// replace character if found, otherwise finished
if (index == -1) {
newStringBuffer.append(str);
str = "";
}
else {
newStringBuffer.append(str.substring(0, pos[index]));
newStringBuffer.append(charsReplace[index]);
str = str.substring(pos[index] + charsFind[index].length());
}
}
return newStringBuffer.toString();
}
/**
* Split up a string containing options into an array of strings,
* one for each option.
*
* @param quotedOptionString the string containing the options
* @return the array of options
* @throws Exception in case of an unterminated string, unknown character or
* a parse error
*/
public static String[] splitOptions(String quotedOptionString) throws Exception{
Vector<String> optionsVec = new Vector<String>();
String str = new String(quotedOptionString);
int i;
while (true){
//trimLeft
i = 0;
while ((i < str.length()) && (Character.isWhitespace(str.charAt(i)))) i++;
str = str.substring(i);
//stop when str is empty
if (str.length() == 0) break;
//if str start with a double quote
if (str.charAt(0) == '"'){
//find the first not anti-slached double quote
i = 1;
while(i < str.length()){
if (str.charAt(i) == str.charAt(0)) break;
if (str.charAt(i) == '\\'){
i += 1;
if (i >= str.length())
throw new Exception("String should not finish with \\");
}
i += 1;
}
if (i >= str.length()) throw new Exception("Quote parse error.");
//add the founded string to the option vector (without quotes)
String optStr = str.substring(1,i);
optStr = unbackQuoteChars(optStr);
optionsVec.addElement(optStr);
str = str.substring(i+1);
} else {
//find first whiteSpace
i=0;
while((i < str.length()) && (!Character.isWhitespace(str.charAt(i)))) i++;
//add the founded string to the option vector
String optStr = str.substring(0,i);
optionsVec.addElement(optStr);
str = str.substring(i);
}
}
//convert optionsVec to an array of String
String[] options = new String[optionsVec.size()];
for (i = 0; i < optionsVec.size(); i++) {
options[i] = (String)optionsVec.elementAt(i);
}
return options;
}
/**
* Joins all the options in an option array into a single string,
* as might be used on the command line.
*
* @param optionArray the array of options
* @return the string containing all options.
*/
public static String joinOptions(String[] optionArray) {
String optionString = "";
for (int i = 0; i < optionArray.length; i++) {
if (optionArray[i].equals("")) {
continue;
}
boolean escape = false;
for (int n = 0; n < optionArray[i].length(); n++) {
if (Character.isWhitespace(optionArray[i].charAt(n))) {
escape = true;
break;
}
}
if (escape) {
optionString += '"' + backQuoteChars(optionArray[i]) + '"';
} else {
optionString += optionArray[i];
}
optionString += " ";
}
return optionString.trim();
}
/**
* Computes entropy for an array of integers.
*
* @param counts array of counts
* @return - a log2 a - b log2 b - c log2 c + (a+b+c) log2 (a+b+c)
* when given array [a b c]
*/
public static /*@pure@*/ double info(int counts[]) {
int total = 0;
double x = 0;
for (int j = 0; j < counts.length; j++) {
x -= xlogx(counts[j]);
total += counts[j];
}
return x + xlogx(total);
}
/**
* Tests if a is smaller or equal to b.
*
* @param a a double
* @param b a double
*/
public static /*@pure@*/ boolean smOrEq(double a,double b) {
return (a-b < SMALL);
}
/**
* Tests if a is greater or equal to b.
*
* @param a a double
* @param b a double
*/
public static /*@pure@*/ boolean grOrEq(double a,double b) {
return (b-a < SMALL);
}
/**
* Tests if a is smaller than b.
*
* @param a a double
* @param b a double
*/
public static /*@pure@*/ boolean sm(double a,double b) {
return (b-a > SMALL);
}
/**
* Tests if a is greater than b.
*
* @param a a double
* @param b a double
*/
public static /*@pure@*/ boolean gr(double a,double b) {
return (a-b > SMALL);
}
/**
* Returns the kth-smallest value in the array.
*
* @param array the array of integers
* @param k the value of k
* @return the kth-smallest value
*/
public static double kthSmallestValue(int[] array, int k) {
int[] index = new int[array.length];
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
return array[index[select(array, index, 0, array.length - 1, k)]];
}
/**
* Returns the kth-smallest value in the array
*
* @param array the array of double
* @param k the value of k
* @return the kth-smallest value
*/
public static double kthSmallestValue(double[] array, int k) {
int[] index = new int[array.length];
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
return array[index[select(array, index, 0, array.length - 1, k)]];
}
/**
* Returns the logarithm of a for base 2.
*
* @param a a double
* @return the logarithm for base 2
*/
public static /*@pure@*/ double log2(double a) {
return Math.log(a) / log2;
}
/**
* Returns index of maximum element in a given
* array of doubles. First maximum is returned.
*
* @param doubles the array of doubles
* @return the index of the maximum element
*/
public static /*@pure@*/ int maxIndex(double[] doubles) {
double maximum = 0;
int maxIndex = 0;
for (int i = 0; i < doubles.length; i++) {
if ((i == 0) || (doubles[i] > maximum)) {
maxIndex = i;
maximum = doubles[i];
}
}
return maxIndex;
}
/**
* Returns index of maximum element in a given
* array of integers. First maximum is returned.
*
* @param ints the array of integers
* @return the index of the maximum element
*/
public static /*@pure@*/ int maxIndex(int[] ints) {
int maximum = 0;
int maxIndex = 0;
for (int i = 0; i < ints.length; i++) {
if ((i == 0) || (ints[i] > maximum)) {
maxIndex = i;
maximum = ints[i];
}
}
return maxIndex;
}
/**
* Computes the mean for an array of doubles.
*
* @param vector the array
* @return the mean
*/
public static /*@pure@*/ double mean(double[] vector) {
double sum = 0;
if (vector.length == 0) {
return 0;
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i];
}
return sum / (double) vector.length;
}
/**
* Returns index of minimum element in a given
* array of integers. First minimum is returned.
*
* @param ints the array of integers
* @return the index of the minimum element
*/
public static /*@pure@*/ int minIndex(int[] ints) {
int minimum = 0;
int minIndex = 0;
for (int i = 0; i < ints.length; i++) {
if ((i == 0) || (ints[i] < minimum)) {
minIndex = i;
minimum = ints[i];
}
}
return minIndex;
}
/**
* Returns index of minimum element in a given
* array of doubles. First minimum is returned.
*
* @param doubles the array of doubles
* @return the index of the minimum element
*/
public static /*@pure@*/ int minIndex(double[] doubles) {
double minimum = 0;
int minIndex = 0;
for (int i = 0; i < doubles.length; i++) {
if ((i == 0) || (doubles[i] < minimum)) {
minIndex = i;
minimum = doubles[i];
}
}
return minIndex;
}
/**
* Normalizes the doubles in the array by their sum.
*
* @param doubles the array of double
* @exception IllegalArgumentException if sum is Zero or NaN
*/
public static void normalize(double[] doubles) {
double sum = 0;
for (int i = 0; i < doubles.length; i++) {
sum += doubles[i];
}
normalize(doubles, sum);
}
/**
* Normalizes the doubles in the array using the given value.
*
* @param doubles the array of double
* @param sum the value by which the doubles are to be normalized
* @exception IllegalArgumentException if sum is zero or NaN
*/
public static void normalize(double[] doubles, double sum) {
if (Double.isNaN(sum)) {
throw new IllegalArgumentException("Can't normalize array. Sum is NaN.");
}
if (sum == 0) {
// Maybe this should just be a return.
throw new IllegalArgumentException("Can't normalize array. Sum is zero.");
}
for (int i = 0; i < doubles.length; i++) {
doubles[i] /= sum;
}
}
/**
* Converts an array containing the natural logarithms of
* probabilities stored in a vector back into probabilities.
* The probabilities are assumed to sum to one.
*
* @param a an array holding the natural logarithms of the probabilities
* @return the converted array
*/
public static double[] logs2probs(double[] a) {
double max = a[maxIndex(a)];
double sum = 0.0;
double[] result = new double[a.length];
for(int i = 0; i < a.length; i++) {
result[i] = Math.exp(a[i] - max);
sum += result[i];
}
normalize(result, sum);
return result;
}
/**
* Returns the log-odds for a given probabilitiy.
*
* @param prob the probabilitiy
*
* @return the log-odds after the probability has been mapped to
* [Utils.SMALL, 1-Utils.SMALL]
*/
public static /*@pure@*/ double probToLogOdds(double prob) {
if (gr(prob, 1) || (sm(prob, 0))) {
throw new IllegalArgumentException("probToLogOdds: probability must " +
"be in [0,1] "+prob);
}
double p = SMALL + (1.0 - 2 * SMALL) * prob;
return Math.log(p / (1 - p));
}
/**
* Rounds a double to the next nearest integer value. The JDK version
* of it doesn't work properly.
*
* @param value the double value
* @return the resulting integer value
*/
public static /*@pure@*/ int round(double value) {
int roundedValue = value > 0
? (int)(value + 0.5)
: -(int)(Math.abs(value) + 0.5);
return roundedValue;
}
/**
* Rounds a double to the next nearest integer value in a probabilistic
* fashion (e.g. 0.8 has a 20% chance of being rounded down to 0 and a
* 80% chance of being rounded up to 1). In the limit, the average of
* the rounded numbers generated by this procedure should converge to
* the original double.
*
* @param value the double value
* @param rand the random number generator
* @return the resulting integer value
*/
public static int probRound(double value, Random rand) {
if (value >= 0) {
double lower = Math.floor(value);
double prob = value - lower;
if (rand.nextDouble() < prob) {
return (int)lower + 1;
} else {
return (int)lower;
}
} else {
double lower = Math.floor(Math.abs(value));
double prob = Math.abs(value) - lower;
if (rand.nextDouble() < prob) {
return -((int)lower + 1);
} else {
return -(int)lower;
}
}
}
/**
* Rounds a double to the given number of decimal places.
*
* @param value the double value
* @param afterDecimalPoint the number of digits after the decimal point
* @return the double rounded to the given precision
*/
public static /*@pure@*/ double roundDouble(double value,int afterDecimalPoint) {
double mask = Math.pow(10.0, (double)afterDecimalPoint);
return (double)(Math.round(value * mask)) / mask;
}
/**
* Sorts a given array of integers in ascending order and returns an
* array of integers with the positions of the elements of the original
* array in the sorted array. The sort is stable. (Equal elements remain
* in their original order.)
*
* @param array this array is not changed by the method!
* @return an array of integers with the positions in the sorted
* array.
*/
public static /*@pure@*/ int[] sort(int[] array) {
int[] index = new int[array.length];
int[] newIndex = new int[array.length];
int[] helpIndex;
int numEqual;
for (int i = 0; i < index.length; i++) {
index[i] = i;
}
quickSort(array, index, 0, array.length - 1);
// Make sort stable
int i = 0;
while (i < index.length) {
numEqual = 1;
for (int j = i + 1; ((j < index.length)
&& (array[index[i]] == array[index[j]]));
j++) {
numEqual++;
}
if (numEqual > 1) {
helpIndex = new int[numEqual];
for (int j = 0; j < numEqual; j++) {
helpIndex[j] = i + j;
}
quickSort(index, helpIndex, 0, numEqual - 1);
for (int j = 0; j < numEqual; j++) {
newIndex[i + j] = index[helpIndex[j]];
}
i += numEqual;
} else {
newIndex[i] = index[i];
i++;
}
}
return newIndex;
}
/**
* Sorts a given array of doubles in ascending order and returns an
* array of integers with the positions of the elements of the
* original array in the sorted array. NOTE THESE CHANGES: the sort
* is no longer stable and it doesn't use safe floating-point
* comparisons anymore. Occurrences of Double.NaN are treated as
* Double.MAX_VALUE
*
* @param array this array is not changed by the method!
* @return an array of integers with the positions in the sorted
* array.
*/
public static /*@pure@*/ int[] sort(/*@non_null@*/ double[] array) {
int[] index = new int[array.length];
array = (double[])array.clone();
for (int i = 0; i < index.length; i++) {
index[i] = i;
if (Double.isNaN(array[i])) {
array[i] = Double.MAX_VALUE;
}
}
quickSort(array, index, 0, array.length - 1);
return index;
}
/**
* Sorts a given array of doubles in ascending order and returns an
* array of integers with the positions of the elements of the original
* array in the sorted array. The sort is stable (Equal elements remain
* in their original order.) Occurrences of Double.NaN are treated as
* Double.MAX_VALUE
*
* @param array this array is not changed by the method!
* @return an array of integers with the positions in the sorted
* array.
*/
public static /*@pure@*/ int[] stableSort(double[] array){
int[] index = new int[array.length];
int[] newIndex = new int[array.length];
int[] helpIndex;
int numEqual;
array = (double[])array.clone();
for (int i = 0; i < index.length; i++) {
index[i] = i;
if (Double.isNaN(array[i])) {
array[i] = Double.MAX_VALUE;
}
}
quickSort(array,index,0,array.length-1);
// Make sort stable
int i = 0;
while (i < index.length) {
numEqual = 1;
for (int j = i+1; ((j < index.length) && Utils.eq(array[index[i]],
array[index[j]])); j++)
numEqual++;
if (numEqual > 1) {
helpIndex = new int[numEqual];
for (int j = 0; j < numEqual; j++)
helpIndex[j] = i+j;
quickSort(index, helpIndex, 0, numEqual-1);
for (int j = 0; j < numEqual; j++)
newIndex[i+j] = index[helpIndex[j]];
i += numEqual;
} else {
newIndex[i] = index[i];
i++;
}
}
return newIndex;
}
/**
* Computes the variance for an array of doubles.
*
* @param vector the array
* @return the variance
*/
public static /*@pure@*/ double variance(double[] vector) {
double sum = 0, sumSquared = 0;
if (vector.length <= 1) {
return 0;
}
for (int i = 0; i < vector.length; i++) {
sum += vector[i];
sumSquared += (vector[i] * vector[i]);
}
double result = (sumSquared - (sum * sum / (double) vector.length)) /
(double) (vector.length - 1);
// We don't like negative variance
if (result < 0) {
return 0;
} else {
return result;
}
}
/**
* Computes the sum of the elements of an array of doubles.
*
* @param doubles the array of double
* @return the sum of the elements
*/
public static /*@pure@*/ double sum(double[] doubles) {
double sum = 0;
for (int i = 0; i < doubles.length; i++) {
sum += doubles[i];
}
return sum;
}
/**
* Computes the sum of the elements of an array of integers.
*
* @param ints the array of integers
* @return the sum of the elements
*/
public static /*@pure@*/ int sum(int[] ints) {
int sum = 0;
for (int i = 0; i < ints.length; i++) {
sum += ints[i];
}
return sum;
}
/**
* Returns c*log2(c) for a given integer value c.
*
* @param c an integer value
* @return c*log2(c) (but is careful to return 0 if c is 0)
*/
public static /*@pure@*/ double xlogx(int c) {
if (c == 0) {
return 0.0;
}
return c * Utils.log2((double) c);
}
/**
* Partitions the instances around a pivot. Used by quicksort and
* kthSmallestValue.
*
* @param array the array of doubles to be sorted
* @param index the index into the array of doubles
* @param l the first index of the subset
* @param r the last index of the subset
*
* @return the index of the middle element
*/
private static int partition(double[] array, int[] index, int l, int r) {
double pivot = array[index[(l + r) / 2]];
int help;
while (l < r) {
while ((array[index[l]] < pivot) && (l < r)) {
l++;
}
while ((array[index[r]] > pivot) && (l < r)) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if ((l == r) && (array[index[r]] > pivot)) {
r--;
}
return r;
}
/**
* Partitions the instances around a pivot. Used by quicksort and
* kthSmallestValue.
*
* @param array the array of integers to be sorted
* @param index the index into the array of integers
* @param l the first index of the subset
* @param r the last index of the subset
*
* @return the index of the middle element
*/
private static int partition(int[] array, int[] index, int l, int r) {
double pivot = array[index[(l + r) / 2]];
int help;
while (l < r) {
while ((array[index[l]] < pivot) && (l < r)) {
l++;
}
while ((array[index[r]] > pivot) && (l < r)) {
r--;
}
if (l < r) {
help = index[l];
index[l] = index[r];
index[r] = help;
l++;
r--;
}
}
if ((l == r) && (array[index[r]] > pivot)) {
r--;
}
return r;
}
/**
* Implements quicksort according to Manber's "Introduction to
* Algorithms".
*
* @param array the array of doubles to be sorted
* @param index the index into the array of doubles
* @param left the first index of the subset to be sorted
* @param right the last index of the subset to be sorted
*/
//@ requires 0 <= first && first <= right && right < array.length;
//@ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && index[i] < array.length);
//@ requires array != index;
// assignable index;
private static void quickSort(/*@non_null@*/ double[] array, /*@non_null@*/ int[] index,
int left, int right) {
if (left < right) {
int middle = partition(array, index, left, right);
quickSort(array, index, left, middle);
quickSort(array, index, middle + 1, right);
}
}
/**
* Implements quicksort according to Manber's "Introduction to
* Algorithms".
*
* @param array the array of integers to be sorted
* @param index the index into the array of integers
* @param left the first index of the subset to be sorted
* @param right the last index of the subset to be sorted
*/
//@ requires 0 <= first && first <= right && right < array.length;
//@ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && index[i] < array.length);
//@ requires array != index;
// assignable index;
private static void quickSort(/*@non_null@*/ int[] array, /*@non_null@*/ int[] index,
int left, int right) {
if (left < right) {
int middle = partition(array, index, left, right);
quickSort(array, index, left, middle);
quickSort(array, index, middle + 1, right);
}
}
/**
* Implements computation of the kth-smallest element according
* to Manber's "Introduction to Algorithms".
*
* @param array the array of double
* @param index the index into the array of doubles
* @param left the first index of the subset
* @param right the last index of the subset
* @param k the value of k
*
* @return the index of the kth-smallest element
*/
//@ requires 0 <= first && first <= right && right < array.length;
private static int select(/*@non_null@*/ double[] array, /*@non_null@*/ int[] index,
int left, int right, int k) {
if (left == right) {
return left;
} else {
int middle = partition(array, index, left, right);
if ((middle - left + 1) >= k) {
return select(array, index, left, middle, k);
} else {
return select(array, index, middle + 1, right, k - (middle - left + 1));
}
}
}
/**
* Converts a File's absolute path to a path relative to the user
* (ie start) directory. Includes an additional workaround for Cygwin, which
* doesn't like upper case drive letters.
* @param absolute the File to convert to relative path
* @return a File with a path that is relative to the user's directory
* @exception Exception if the path cannot be constructed
*/
public static File convertToRelativePath(File absolute) throws Exception {
File result;
String fileStr;
result = null;
// if we're running windows, it could be Cygwin
if (File.separator.equals("\\")) {
// Cygwin doesn't like upper case drives -> try lower case drive
try {
fileStr = absolute.getPath();
fileStr = fileStr.substring(0, 1).toLowerCase()
+ fileStr.substring(1);
result = createRelativePath(new File(fileStr));
}
catch (Exception e) {
// no luck with Cygwin workaround, convert it like it is
result = createRelativePath(absolute);
}
}
else {
result = createRelativePath(absolute);
}
return result;
}
/**
* Converts a File's absolute path to a path relative to the user
* (ie start) directory.
*
* @param absolute the File to convert to relative path
* @return a File with a path that is relative to the user's directory
* @exception Exception if the path cannot be constructed
*/
protected static File createRelativePath(File absolute) throws Exception {
File userDir = new File(System.getProperty("user.dir"));
String userPath = userDir.getAbsolutePath() + File.separator;
String targetPath = (new File(absolute.getParent())).getPath()
+ File.separator;
String fileName = absolute.getName();
StringBuffer relativePath = new StringBuffer();
// relativePath.append("."+File.separator);
// System.err.println("User dir "+userPath);
// System.err.println("Target path "+targetPath);
// file is in user dir (or subdir)
int subdir = targetPath.indexOf(userPath);
if (subdir == 0) {
if (userPath.length() == targetPath.length()) {
relativePath.append(fileName);
} else {
int ll = userPath.length();
relativePath.append(targetPath.substring(ll));
relativePath.append(fileName);
}
} else {
int sepCount = 0;
String temp = new String(userPath);
while (temp.indexOf(File.separator) != -1) {
int ind = temp.indexOf(File.separator);
sepCount++;
temp = temp.substring(ind+1, temp.length());
}
String targetTemp = new String(targetPath);
String userTemp = new String(userPath);
int tcount = 0;
while (targetTemp.indexOf(File.separator) != -1) {
int ind = targetTemp.indexOf(File.separator);
int ind2 = userTemp.indexOf(File.separator);
String tpart = targetTemp.substring(0,ind+1);
String upart = userTemp.substring(0,ind2+1);
if (tpart.compareTo(upart) != 0) {
if (tcount == 0) {
tcount = -1;
}
break;
}
tcount++;
targetTemp = targetTemp.substring(ind+1, targetTemp.length());
userTemp = userTemp.substring(ind2+1, userTemp.length());
}
if (tcount == -1) {
// then target file is probably on another drive (under windows)
throw new Exception("Can't construct a path to file relative to user "
+"dir.");
}
if (targetTemp.indexOf(File.separator) == -1) {
targetTemp = "";
}
for (int i = 0; i < sepCount - tcount; i++) {
relativePath.append(".."+File.separator);
}
relativePath.append(targetTemp + fileName);
}
// System.err.println("new path : "+relativePath.toString());
return new File(relativePath.toString());
}
/**
* Implements computation of the kth-smallest element according
* to Manber's "Introduction to Algorithms".
*
* @param array the array of integers
* @param index the index into the array of integers
* @param left the first index of the subset
* @param right the last index of the subset
* @param k the value of k
*
* @return the index of the kth-smallest element
*/
//@ requires 0 <= first && first <= right && right < array.length;
private static int select(/*@non_null@*/ int[] array, /*@non_null@*/ int[] index,
int left, int right, int k) {
if (left == right) {
return left;
} else {
int middle = partition(array, index, left, right);
if ((middle - left + 1) >= k) {
return select(array, index, left, middle, k);
} else {
return select(array, index, middle + 1, right, k - (middle - left + 1));
}
}
}
/**
* Breaks up the string, if wider than "columns" characters.
*
* @param s the string to process
* @param columns the width in columns
* @return the processed string
*/
public static String[] breakUp(String s, int columns) {
Vector<String> result;
String line;
BreakIterator boundary;
int boundaryStart;
int boundaryEnd;
String word;
String punctuation;
int i;
String[] lines;
result = new Vector<String>();
punctuation = " .,;:!?'\"";
lines = s.split("\n");
for (i = 0; i < lines.length; i++) {
boundary = BreakIterator.getWordInstance();
boundary.setText(lines[i]);
boundaryStart = boundary.first();
boundaryEnd = boundary.next();
line = "";
while (boundaryEnd != BreakIterator.DONE) {
word = lines[i].substring(boundaryStart, boundaryEnd);
if (line.length() >= columns) {
if (word.length() == 1) {
if (punctuation.indexOf(word.charAt(0)) > -1) {
line += word;
word = "";
}
}
result.add(line);
line = "";
}
line += word;
boundaryStart = boundaryEnd;
boundaryEnd = boundary.next();
}
if (line.length() > 0)
result.add(line);
}
return result.toArray(new String[result.size()]);
}
/**
* Creates a new instance of an object given it's class name and
* (optional) arguments to pass to it's setOptions method. If the
* object implements OptionHandler and the options parameter is
* non-null, the object will have it's options set. Example use:<p>
*
* <code> <pre>
* String classifierName = Utils.getOption('W', options);
* Classifier c = (Classifier)Utils.forName(Classifier.class,
* classifierName,
* options);
* setClassifier(c);
* </pre></code>
*
* @param classType the class that the instantiated object should
* be assignable to -- an exception is thrown if this is not the case
* @param className the fully qualified class name of the object
* @param options an array of options suitable for passing to setOptions. May
* be null. Any options accepted by the object will be removed from the
* array.
* @return the newly created object, ready for use.
* @exception Exception if the class name is invalid, or if the
* class is not assignable to the desired class type, or the options
* supplied are not acceptable to the object
*/
public static Object forName(Class<?> classType,
String className,
String[] options) throws Exception {
Class<?> c = null;
try {
c = Class.forName(className);
} catch (Exception ex) {
throw new Exception("Can't find class called: " + className);
}
if (!classType.isAssignableFrom(c)) {
throw new Exception(classType.getName() + " is not assignable from "
+ className);
}
Object o = c.newInstance();
/*if ((o instanceof OptionHandler)
&& (options != null)) {
((OptionHandler)o).setOptions(options);
Utils.checkForRemainingOptions(options);
}*/
return o;
}
}