blob: 3651ab93b21da75d35e3697e26e902bd31df0c9b [file] [log] [blame]
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula.ptg;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.common.Duplicatable;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.util.GenericRecordJsonWriter;
import org.apache.poi.util.LittleEndianByteArrayOutputStream;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
/**
* <tt>Ptg</tt> represents a syntactic token in a formula. 'PTG' is an acronym for
* '<b>p</b>arse <b>t</b>hin<b>g</b>'. Originally, the name referred to the single
* byte identifier at the start of the token, but in POI, <tt>Ptg</tt> encapsulates
* the whole formula token (initial byte + value data).
* <p>
*
* <tt>Ptg</tt>s are logically arranged in a tree representing the structure of the
* parsed formula. However, in BIFF files <tt>Ptg</tt>s are written/read in
* <em>Reverse-Polish Notation</em> order. The RPN ordering also simplifies formula
* evaluation logic, so POI mostly accesses <tt>Ptg</tt>s in the same way.
*/
public abstract class Ptg implements Duplicatable, GenericRecord {
public static final Ptg[] EMPTY_PTG_ARRAY = { };
public static final byte CLASS_REF = 0x00;
public static final byte CLASS_VALUE = 0x20;
public static final byte CLASS_ARRAY = 0x40;
private byte ptgClass = CLASS_REF; //base ptg
protected Ptg() {}
protected Ptg(Ptg other) {
ptgClass = other.ptgClass;
}
/**
* Reads <tt>size</tt> bytes of the input stream, to create an array of <tt>Ptg</tt>s.
* Extra data (beyond <tt>size</tt>) may be read if and <tt>ArrayPtg</tt>s are present.
*/
public static Ptg[] readTokens(int size, LittleEndianInput in) {
List<Ptg> temp = new ArrayList<>(4 + size / 2);
int pos = 0;
boolean hasArrayPtgs = false;
while (pos < size) {
Ptg ptg = Ptg.createPtg(in);
if (ptg instanceof ArrayInitialPtg) {
hasArrayPtgs = true;
}
pos += ptg.getSize();
temp.add(ptg);
}
if(pos != size) {
throw new RuntimeException("Ptg array size mismatch");
}
if (hasArrayPtgs) {
Ptg[] result = toPtgArray(temp);
for (int i=0;i<result.length;i++) {
if (result[i] instanceof ArrayInitialPtg) {
result[i] = ((ArrayInitialPtg) result[i]).finishReading(in);
}
}
return result;
}
return toPtgArray(temp);
}
public static Ptg createPtg(LittleEndianInput in) {
byte id = in.readByte();
if (id < 0x20) {
return createBasePtg(id, in);
}
Ptg retval = createClassifiedPtg(id, in);
if (id >= 0x60) {
retval.setClass(CLASS_ARRAY);
} else if (id >= 0x40) {
retval.setClass(CLASS_VALUE);
} else {
retval.setClass(CLASS_REF);
}
return retval;
}
private static Ptg createClassifiedPtg(byte id, LittleEndianInput in) {
int baseId = id & 0x1F | 0x20;
switch (baseId) {
case ArrayPtg.sid: return new ArrayInitialPtg(in);//0x20, 0x40, 0x60
case FuncPtg.sid: return FuncPtg.create(in); // 0x21, 0x41, 0x61
case FuncVarPtg.sid: return FuncVarPtg.create(in);//0x22, 0x42, 0x62
case NamePtg.sid: return new NamePtg(in); // 0x23, 0x43, 0x63
case RefPtg.sid: return new RefPtg(in); // 0x24, 0x44, 0x64
case AreaPtg.sid: return new AreaPtg(in); // 0x25, 0x45, 0x65
case MemAreaPtg.sid: return new MemAreaPtg(in); // 0x26, 0x46, 0x66
case MemErrPtg.sid: return new MemErrPtg(in); // 0x27, 0x47, 0x67
case MemFuncPtg.sid: return new MemFuncPtg(in); // 0x29, 0x49, 0x69
case RefErrorPtg.sid: return new RefErrorPtg(in); // 0x2a, 0x4a, 0x6a
case AreaErrPtg.sid: return new AreaErrPtg(in); // 0x2b, 0x4b, 0x6b
case RefNPtg.sid: return new RefNPtg(in); // 0x2c, 0x4c, 0x6c
case AreaNPtg.sid: return new AreaNPtg(in); // 0x2d, 0x4d, 0x6d
case NameXPtg.sid: return new NameXPtg(in); // 0x39, 0x49, 0x79
case Ref3DPtg.sid: return new Ref3DPtg(in); // 0x3a, 0x5a, 0x7a
case Area3DPtg.sid: return new Area3DPtg(in); // 0x3b, 0x5b, 0x7b
case DeletedRef3DPtg.sid: return new DeletedRef3DPtg(in); // 0x3c, 0x5c, 0x7c
case DeletedArea3DPtg.sid: return new DeletedArea3DPtg(in); // 0x3d, 0x5d, 0x7d
}
throw new UnsupportedOperationException(" Unknown Ptg in Formula: 0x"+
Integer.toHexString(id) + " (" + ( int ) id + ")");
}
private static Ptg createBasePtg(byte id, LittleEndianInput in) {
switch(id) {
case 0x00: return new UnknownPtg(id); // TODO - not a real Ptg
case ExpPtg.sid: return new ExpPtg(in); // 0x01
case TblPtg.sid: return new TblPtg(in); // 0x02
case AddPtg.sid: return AddPtg.instance; // 0x03
case SubtractPtg.sid: return SubtractPtg.instance; // 0x04
case MultiplyPtg.sid: return MultiplyPtg.instance; // 0x05
case DividePtg.sid: return DividePtg.instance; // 0x06
case PowerPtg.sid: return PowerPtg.instance; // 0x07
case ConcatPtg.sid: return ConcatPtg.instance; // 0x08
case LessThanPtg.sid: return LessThanPtg.instance; // 0x09
case LessEqualPtg.sid: return LessEqualPtg.instance; // 0x0a
case EqualPtg.sid: return EqualPtg.instance; // 0x0b
case GreaterEqualPtg.sid: return GreaterEqualPtg.instance;// 0x0c
case GreaterThanPtg.sid: return GreaterThanPtg.instance; // 0x0d
case NotEqualPtg.sid: return NotEqualPtg.instance; // 0x0e
case IntersectionPtg.sid: return IntersectionPtg.instance;// 0x0f
case UnionPtg.sid: return UnionPtg.instance; // 0x10
case RangePtg.sid: return RangePtg.instance; // 0x11
case UnaryPlusPtg.sid: return UnaryPlusPtg.instance; // 0x12
case UnaryMinusPtg.sid: return UnaryMinusPtg.instance; // 0x13
case PercentPtg.sid: return PercentPtg.instance; // 0x14
case ParenthesisPtg.sid: return ParenthesisPtg.instance; // 0x15
case MissingArgPtg.sid: return MissingArgPtg.instance; // 0x16
case StringPtg.sid: return new StringPtg(in); // 0x17
// not implemented yet: case SxNamePtg.sid: return new SxNamePtg(in); // 0x18
case AttrPtg.sid: return new AttrPtg(in); // 0x19
case ErrPtg.sid: return ErrPtg.read(in); // 0x1c
case BoolPtg.sid: return BoolPtg.read(in); // 0x1d
case IntPtg.sid: return new IntPtg(in); // 0x1e
case NumberPtg.sid: return new NumberPtg(in); // 0x1f
}
throw new RuntimeException("Unexpected base token id (" + id + ")");
}
private static Ptg[] toPtgArray(List<Ptg> l) {
if (l.isEmpty()) {
return EMPTY_PTG_ARRAY;
}
Ptg[] result = new Ptg[l.size()];
l.toArray(result);
return result;
}
/**
* This method will return the same result as {@link #getEncodedSizeWithoutArrayData(Ptg[])}
* if there are no array tokens present.
* @return the full size taken to encode the specified <tt>Ptg</tt>s
*/
public static int getEncodedSize(Ptg[] ptgs) {
int result = 0;
for (Ptg ptg : ptgs) {
result += ptg.getSize();
}
return result;
}
/**
* Used to calculate value that should be encoded at the start of the encoded Ptg token array;
* @return the size of the encoded Ptg tokens not including any trailing array data.
*/
public static int getEncodedSizeWithoutArrayData(Ptg[] ptgs) {
int result = 0;
for (Ptg ptg : ptgs) {
if (ptg instanceof ArrayPtg) {
result += ArrayPtg.PLAIN_TOKEN_SIZE;
} else {
result += ptg.getSize();
}
}
return result;
}
/**
* Writes the ptgs to the data buffer, starting at the specified offset.
*
* <br>
* The 2 byte encode length field is <b>not</b> written by this method.
* @return number of bytes written
*/
public static int serializePtgs(Ptg[] ptgs, byte[] array, int offset) {
LittleEndianByteArrayOutputStream out = new LittleEndianByteArrayOutputStream(array, offset); // NOSONAR
List<Ptg> arrayPtgs = null;
for (Ptg ptg : ptgs) {
ptg.write(out);
if (ptg instanceof ArrayPtg) {
if (arrayPtgs == null) {
arrayPtgs = new ArrayList<>(5);
}
arrayPtgs.add(ptg);
}
}
if (arrayPtgs != null) {
for (Ptg arrayPtg : arrayPtgs) {
ArrayPtg p = (ArrayPtg) arrayPtg;
p.writeTokenValueBytes(out);
}
}
return out.getWriteIndex() - offset;
}
/**
* @return the encoded length of this Ptg, including the initial Ptg type identifier byte.
*/
public abstract int getSize();
public abstract void write(LittleEndianOutput out);
/**
* return a string representation of this token alone
*/
public abstract String toFormulaString();
@Override
public final String toString() {
return GenericRecordJsonWriter.marshal(this);
}
public final void setClass(byte thePtgClass) {
if (isBaseToken()) {
throw new RuntimeException("setClass should not be called on a base token");
}
ptgClass = thePtgClass;
}
/**
* @return the 'operand class' (REF/VALUE/ARRAY) for this Ptg
*/
public final byte getPtgClass() {
return ptgClass;
}
/**
* Debug / diagnostic method to get this token's 'operand class' type.
* @return 'R' for 'reference', 'V' for 'value', 'A' for 'array' and '.' for base tokens
*/
public final char getRVAType() {
if (isBaseToken()) {
return '.';
}
switch (ptgClass) {
case Ptg.CLASS_REF: return 'R';
case Ptg.CLASS_VALUE: return 'V';
case Ptg.CLASS_ARRAY: return 'A';
}
throw new RuntimeException("Unknown operand class (" + ptgClass + ")");
}
public abstract byte getDefaultOperandClass();
/**
* @return <code>false</code> if this token is classified as 'reference', 'value', or 'array'
*/
public abstract boolean isBaseToken();
public static boolean doesFormulaReferToDeletedCell(Ptg[] ptgs) {
for (Ptg ptg : ptgs) {
if (isDeletedCellRef(ptg)) {
return true;
}
}
return false;
}
private static boolean isDeletedCellRef(Ptg ptg) {
if (ptg == ErrPtg.REF_INVALID) {
return true;
}
if (ptg instanceof DeletedArea3DPtg) {
return true;
}
if (ptg instanceof DeletedRef3DPtg) {
return true;
}
if (ptg instanceof AreaErrPtg) {
return true;
}
if (ptg instanceof RefErrorPtg) {
return true;
}
return false;
}
@Override
public abstract Ptg copy();
/**
* @return structure id of the parsed thing, or {@code -1} if the record has no sid
*/
public abstract byte getSid();
}