blob: c1802cbefb727a0a393c30a3965272c8ed53183d [file] [log] [blame]
/**************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/
package org.openoffice.xmerge.converter.xml.sxc.pexcel.records.formula;
import java.io.*;
import java.util.Vector;
import java.util.Enumeration;
import org.openoffice.xmerge.util.Debug;
import org.openoffice.xmerge.util.EndianConverter;
import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.DefinedName;
import org.openoffice.xmerge.converter.xml.sxc.pexcel.records.Workbook;
/**
* The TokenDecoder decodes a byte[] to an equivalent <code>String</code>. The only
* public method apart from the default constructor is the getTokenVector method.
* This method takes an entire formula as a pexcel byte[] and decodes it into
* a series of <code>Token</code>s. It adds these to a <code>Vector</code> which
* is returned once all the tokens have been decoded. The Decoder supports
* the following tokens.<br><br>
*
* Operands Floating point's, Cell references (absolute and relative),
* cell ranges<br>
* Operators +,-,*,/,&lt;,&gt;.&lt;=,&gt;=,&lt;&gt;<br>
* Functions All pexcel fixed and varaible argument functions
*
*/
public class TokenDecoder {
private TokenFactory tf;
private FunctionLookup fl;
private OperatorLookup operatorLookup;
private OperandLookup operandLookup;
private Workbook wb;
/**
* Default Constructor initializes the <code>TokenFactory</code> for generating
* <code>Token</code> and the <code>SymbolLookup</code> for generating
* Strings from hex values.
*/
public TokenDecoder() {
tf = new TokenFactory();
fl = new FunctionLookup();
operatorLookup = new OperatorLookup();
operandLookup = new OperandLookup();
}
/**
* Sets global workbook data needed for defined names
*/
public void setWorkbook(Workbook wb) {
Debug.log(Debug.TRACE, "TokenDecoder : setWorkbook");
this.wb = wb;
}
/**
* Returns a <code>Vector</code> of <code>Token</code> decoded from a
* byte[]. The byte[] is first converted to a
* <code>ByteArrayInputStream</code> as this is the easiest way of reading
* bytes.
*
* @param formula A Pocket Excel Formula byte[]
* @return A <code>Vector</code> of deoded <code>Token</code>
*/
public Vector getTokenVector(byte[] formula) {
Vector v = new Vector();
ByteArrayInputStream bis = new ByteArrayInputStream(formula);
int b = 0 ;
Token t;
while ((b = bis.read())!=-1)
{
switch (b) {
case TokenConstants.TAREA3D:
Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: ");
v.add(read3DCellAreaRefToken(bis));
Debug.log(Debug.TRACE, "Decoded 3D Area Cell Reference: " + v.lastElement());
break;
case TokenConstants.TREF3D:
Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: ");
v.add(read3DCellRefToken(bis));
Debug.log(Debug.TRACE, "Decoded 3D Cell Reference: " + v.lastElement());
break;
case TokenConstants.TREF :
v.add(readCellRefToken(bis));
Debug.log(Debug.TRACE, "Decoded Cell Reference: " + v.lastElement());
break;
case TokenConstants.TAREA :
v.add(readCellAreaRefToken(bis));
Debug.log(Debug.TRACE, "Decoded Cell Area Reference: " + v.lastElement());
break;
case TokenConstants.TNUM :
v.add(readNumToken(bis));
Debug.log(Debug.TRACE, "Decoded number : " + v.lastElement());
break;
case TokenConstants.TFUNCVAR :
v.add(readFunctionVarToken(bis));
Debug.log(Debug.TRACE, "Decoded variable argument function: " + v.lastElement());
break;
case TokenConstants.TFUNC :
v.add(readFunctionToken(bis));
Debug.log(Debug.TRACE, "Decoded function: " + v.lastElement());
break;
case TokenConstants.TSTRING :
v.add(readStringToken(bis));
Debug.log(Debug.TRACE, "Decoded string: " + v.lastElement());
break;
case TokenConstants.TNAME :
v.add(readNameToken(bis));
Debug.log(Debug.TRACE, "Decoded defined name: " + v.lastElement());
break;
case TokenConstants.TUPLUS:
case TokenConstants.TUMINUS:
case TokenConstants.TPERCENT:
v.add(readOperatorToken(b, 1));
Debug.log(Debug.TRACE, "Decoded Unary operator : " + v.lastElement());
break;
case TokenConstants.TADD :
case TokenConstants.TSUB :
case TokenConstants.TMUL :
case TokenConstants.TDIV :
case TokenConstants.TLESS :
case TokenConstants.TLESSEQUALS :
case TokenConstants.TEQUALS :
case TokenConstants.TGTEQUALS :
case TokenConstants.TGREATER :
case TokenConstants.TNEQUALS :
v.add(readOperatorToken(b, 2));
Debug.log(Debug.TRACE, "Decoded Binary operator : " + v.lastElement());
break;
default :
Debug.log(Debug.TRACE, "Unrecognized byte : " + b);
}
}
return v;
}
/**
* Converts a zero based integer to a char (eg. a=0, b=1).
* It assumes the integer is less than 26.
*
* @param i A 0 based index
* @return The equivalent character
*/
private char int2Char(int i) {
return (char) ('A' + i);
}
/**
* Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded String <code>Token</code>
*/
private Token readStringToken(ByteArrayInputStream bis) {
int len = ((int)bis.read())*2;
int options = (int)bis.read();
Debug.log(Debug.TRACE,"String length is " + len + " and Options Flag is " + options);
byte [] stringBytes = new byte[len];
int numRead =0;
if ((numRead = bis.read(stringBytes, 0, len)) != len) {
Debug.log(Debug.TRACE,"Expected " + len + " bytes. Could only read " + numRead + " bytes.");
//throw new IOException("Expected " + len + " bytes. Could only read " + numRead + " bytes.");
}
StringBuffer outputString = new StringBuffer();
outputString.append('"');
try {
Debug.log(Debug.TRACE,"Using LE encoding");
outputString.append(new String(stringBytes, "UTF-16LE"));
} catch (IOException eIO) {
outputString.append(new String(stringBytes)); //fall back to default encoding
}
outputString.append('"');
return (tf.getOperandToken(outputString.toString(), "STRING"));
}
/**
* Reads a Defined Name token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded Name <code>Token</code>
*/
private Token readNameToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int nameIndex = EndianConverter.readShort(buffer);
bis.skip(12); // the next 12 bytes are unused
Enumeration e = wb.getDefinedNames();
int i = 1;
while(i<nameIndex) {
e.nextElement();
i++;
}
Debug.log(Debug.TRACE,"Name index is " + nameIndex);
DefinedName dn = (DefinedName)e.nextElement();
Debug.log(Debug.TRACE,"DefinedName is " + dn.getName());
return (tf.getOperandToken(dn.getName(), "NAME"));
}
/**
* Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded Cell Reference <code>Token</code>
*/
private Token readCellRefToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
String outputString = new String();
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int formulaRow = EndianConverter.readShort(buffer);
int relativeFlags = (formulaRow & 0xC000)>>14;
formulaRow &= 0x3FFF;
int formulaCol = (byte) bis.read();
outputString = int2CellStr(formulaRow, formulaCol, relativeFlags);
return (tf.getOperandToken(outputString,"CELL_REFERENCE"));
}
/**
* Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded Cell Reference <code>Token</code>
*/
private Token read3DCellRefToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
String outputString = new String();
bis.skip(10);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int Sheet1 = EndianConverter.readShort(buffer);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int Sheet2 = EndianConverter.readShort(buffer);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int formulaRow = EndianConverter.readShort(buffer);
int relativeFlags = (formulaRow & 0xC000)>>14;
formulaRow &= 0x3FFF;
int formulaCol = (byte) bis.read();
String cellRef = "." + int2CellStr(formulaRow, formulaCol, relativeFlags);
if(Sheet1 == Sheet2) {
outputString = "$" + wb.getSheetName(Sheet1) + cellRef;
} else {
outputString = "$" + wb.getSheetName(Sheet1) + cellRef + ":$" + wb.getSheetName(Sheet2) + cellRef;
}
return (tf.getOperandToken(outputString,"3D_CELL_REFERENCE"));
}
/**
* Reads a Cell Reference token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded Cell Reference <code>Token</code>
*/
private Token read3DCellAreaRefToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
String outputString = new String();
bis.skip(10);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int Sheet1 = EndianConverter.readShort(buffer);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int Sheet2 = EndianConverter.readShort(buffer);
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int formulaRow1 = EndianConverter.readShort(buffer);
int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
formulaRow1 &= 0x3FFF;
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int formulaRow2 = EndianConverter.readShort(buffer);
int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
formulaRow2 &= 0x3FFF;
int formulaCol1 = (byte) bis.read();
int formulaCol2 = (byte) bis.read();
String cellRef1 = "." + int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
String cellRef2 = int2CellStr(formulaRow2, formulaCol2, relativeFlags2);
if(Sheet1 == Sheet2) {
outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":" + cellRef2;
} else {
outputString = "$" + wb.getSheetName(Sheet1) + cellRef1 + ":$" + wb.getSheetName(Sheet2) + "." + cellRef2;
}
return (tf.getOperandToken(outputString,"3D_CELL_AREA_REFERENCE"));
}
/**
* Converts a row and col 0 based index to a spreadsheet cell reference.
* It also has a relativeFlags which indicates whether or not the
* Cell Reference is relative or absolute (Absolute is denoted with '$')
*
* 00 = absolute row, absolute col
* 01 = absolute row, relative col
* 10 = relative row, absolute col
* 11 = relative row, relative col
*
* @param row The cell reference 0 based index to the row
* @param col The cell reference 0 based index to the row
* @param relativeFlags Flags indicating addressing of row and column
* @return A <code>String</code> representing a cell reference
*/
private String int2CellStr(int row, int col, int relativeFlags) {
String outputString = "";
int firstChar = (col + 1) / 26;
if((relativeFlags & 1) == 0) {
outputString += "$";
}
if(firstChar>0) {
int secondChar = (col + 1) % 26;
outputString += Character.toString(int2Char(firstChar - 1)) + Character.toString(int2Char(secondChar - 1));
} else {
outputString += Character.toString(int2Char(col));
}
if((relativeFlags & 2) == 0) {
outputString += "$";
}
outputString += Integer.toString(row+1);
return outputString;
}
/**
* Reads a Cell Area Reference (cell range) <code>Token</code> from
* the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The equivalent Cell Area Reference (cell range)
* <code>Token</code>
*/
private Token readCellAreaRefToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
int formulaRow1, formulaRow2;
int formulaCol1, formulaCol2;
String outputString = new String();
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
formulaRow1 = EndianConverter.readShort(buffer);
int relativeFlags1 = (formulaRow1 & 0xC000)>>14;
formulaRow1 &= 0x3FFF;
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
formulaRow2 = EndianConverter.readShort(buffer);
int relativeFlags2 = (formulaRow2 & 0xC000)>>14;
formulaRow2 &= 0x3FFF;
formulaCol1 = (byte) bis.read();
formulaCol2 = (byte) bis.read();
outputString = int2CellStr(formulaRow1, formulaCol1, relativeFlags1);
outputString += (":" + int2CellStr(formulaRow2, formulaCol2, relativeFlags2));
return (tf.getOperandToken(outputString,"CELL_AREA_REFERENCE"));
}
/**
* Reads a Number (floating point) token from the <code>ByteArrayInputStream</code>
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded Integer <code>Token</code>
*/
private Token readNumToken(ByteArrayInputStream bis) {
byte numBuffer[] = new byte[8];
for(int j=0;j<8;j++) {
numBuffer[j]=(byte) bis.read();
}
return (tf.getOperandToken(Double.toString(EndianConverter.readDouble(numBuffer)),"NUMBER"));
}
/**
* Read an Operator token from the <code>ByteArrayInputStream</code>
*
* @param b A Pocket Excel number representing an operator.
* @param args The number of arguments this operator takes.
* @return The decoded Operator <code>Token</code>
*/
private Token readOperatorToken(int b, int args) {
Token t;
if(b==TokenConstants.TUPLUS) {
t = tf.getOperatorToken("+", args);
} else if(b==TokenConstants.TUMINUS) {
t = tf.getOperatorToken("-", args);
} else {
t = tf.getOperatorToken(operatorLookup.getStringFromID(b), args);
}
return t;
}
/**
* Read a Function token from the <code>ByteArrayInputStream</code>
* This function can have any number of arguments and this number is read
* in with the record
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded variable argument Function <code>Token</code>
*/
private Token readFunctionVarToken(ByteArrayInputStream bis) {
int numArgs = 0;
numArgs = bis.read();
byte buffer[] = new byte[2];
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int functionID = EndianConverter.readShort(buffer);
return (tf.getFunctionToken(fl.getStringFromID(functionID),numArgs));
}
/**
* Read a Function token from the <code>ByteArrayInputStream</code>
* This function has a fixed number of arguments which it will get
* from <code>FunctionLookup</code>.
*
* @param bis The <code>ByteArrayInputStream</code> from which we read the
* bytes.
* @return The decoded fixed argument Function <code>Token</code>
*/
private Token readFunctionToken(ByteArrayInputStream bis) {
byte buffer[] = new byte[2];
buffer[0] = (byte) bis.read();
buffer[1] = (byte) bis.read();
int functionID = EndianConverter.readShort(buffer);
String functionName = fl.getStringFromID(functionID);
return (tf.getFunctionToken(functionName,fl.getArgCountFromString(functionName)));
}
}