blob: 0648474bb546734935ec192bd4020f06101fe855 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.util;
import java.io.Serializable;
import java.util.NoSuchElementException;
/**
* This string tokenizer is essentially a simplified StringTokenizer.
* In addition to the default functionality it allows to reset the tokenizer and it makes
* the simplifying assumptions of (1) no returns delimiter, and (2) single character delimiter.
*
*/
public class FastStringTokenizer implements Serializable
{
private static final long serialVersionUID = 4051436015710778611L;
private String _string = null;
private char _del = 0;
private int _pos = -1;
/**
* Constructs a new StringTokenizer for string using the specified
* delimiters, returnDelimiters is false.
*
* @param string
* the string to be tokenized
* @param delimiters
* the delimiters to use
*/
public FastStringTokenizer(char delimiter)
{
_del = delimiter;
reset( null );
}
/**
*
* @param string
*/
public void reset( String string )
{
_string = string;
_pos = 0;
}
/**
* Returns the next token in the string as a String.
*
* @return next token in the string as a String
* @exception NoSuchElementException
* if no tokens remain
*/
public String nextToken()
{
int len = _string.length();
int start = _pos;
//find start (skip over leading delimiters)
while(start < len && _del == _string.charAt(start) )
start++;
//find end (next delimiter) and return
if(start < len) {
_pos = _string.indexOf(_del, start);
if( start < _pos && _pos < len )
return _string.substring(start, _pos);
else
return _string.substring(start);
}
//no next token
throw new NoSuchElementException();
}
////////////////////////////////////////
// Custom parsing methods for textcell
////////////////////////////////////////
public int nextInt()
{
return Integer.parseInt( nextToken() );
}
public long nextLong()
{
return Long.parseLong( nextToken() );
}
public double nextDouble()
{
return Double.parseDouble( nextToken() );
//see nextDoubleForParallel, we use the same double parsing
//for sequential and parallel parsing because (1) it is faster (~10%)
//and (2) for consistency between sequential and parallel readers
//return FloatingDecimal.parseDouble(nextToken());
}
public double nextDoubleForParallel()
{
//JDK 8 floating decimal, which removes a severe scalability bottleneck
//(synchronized static cache) in JDK7
//return FloatingDecimal.parseDouble(nextToken());
return Double.parseDouble( nextToken() );
/*
//return Double.parseDouble( nextToken() );
//NOTE: Depending on the platform string-2-double conversions were
//the main bottleneck in reading text data. Furthermore, we observed
//severe contention on multi-threaded parsing on Linux JDK.
// ---
//This is a known issue and has been fixed in JDK8.
//JDK-7032154 : Performance tuning of sun.misc.FloatingDecimal/FormattedFloatingDecimal
// Simple workaround without JDK8 code, however, this does NOT guarantee exactly
// the same result due to potential for round off errors.
String val = nextToken();
double ret = 0;
if( UtilFunctions.isSimpleDoubleNumber(val) )
{
int ix = val.indexOf('.');
if( ix > 0 ) //DOUBLE parsing
{
String s1 = val.substring(0, ix);
String s2 = val.substring(ix+1);
long tmp1 = Long.parseLong(s1);
long tmp2 = Long.parseLong(s2);
ret = (double)tmp2 / Math.pow(10, s2.length()) + tmp1;
}
else //LONG parsing and cast to double
ret = (double)Long.parseLong(val);
}
else
{
//fall-back to slow default impl if special characters
//e.g., ...E-0X, NAN, +-INFINITY, etc
ret = Double.parseDouble( val );
}
return ret;
*/
}
}