blob: 617f7b052d66501856c8cbcc1d37dd4137b91a52 [file] [log] [blame]
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula.function;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import org.apache.poi.poifs.crypt.CryptoFunctions;
import org.apache.poi.poifs.crypt.HashAlgorithm;
import org.apache.poi.util.TempFile;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
/**
* This class is not used during normal POI run-time but is used at development time to generate
* the file 'functionMetadata.txt'. There are more than 300 built-in functions in Excel and the
* intention of this class is to make it easier to maintain the metadata, by extracting it from
* a reliable source.
*/
public final class ExcelFileFormatDocFunctionExtractor {
private static final String SOURCE_DOC_FILE_NAME = "excelfileformat.odt";
/**
* For simplicity, the output file is strictly simple ASCII.
* This method detects any unexpected characters.
*/
/* package */ static boolean isSimpleAscii(char c) {
if (c>=0x21 && c<=0x7E) {
// everything from '!' to '~' (includes letters, digits, punctuation
return true;
}
// some specific whitespace chars below 0x21:
switch(c) {
case ' ':
case '\t':
case '\r':
case '\n':
return true;
}
return false;
}
private static final class FunctionData {
// special characters from the ooo document
private static final int CHAR_ELLIPSIS_8230 = 8230;
private static final int CHAR_NDASH_8211 = 8211;
private final int _index;
private final boolean _hasFootnote;
private final String _name;
private final int _minParams;
private final int _maxParams;
private final String _returnClass;
private final String _paramClasses;
private final boolean _isVolatile;
public FunctionData(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
String returnClass, String paramClasses, boolean isVolatile) {
_index = funcIx;
_hasFootnote = hasFootnote;
_name = funcName;
_minParams = minParams;
_maxParams = maxParams;
_returnClass = convertSpecialChars(returnClass);
_paramClasses = convertSpecialChars(paramClasses);
_isVolatile = isVolatile;
}
private static String convertSpecialChars(String ss) {
StringBuffer sb = new StringBuffer(ss.length() + 4);
for(int i=0; i<ss.length(); i++) {
char c = ss.charAt(i);
if (isSimpleAscii(c)) {
sb.append(c);
continue;
}
switch (c) {
case CHAR_NDASH_8211:
sb.append('-');
continue;
case CHAR_ELLIPSIS_8230:
sb.append("...");
continue;
}
throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'");
}
return sb.toString();
}
public int getIndex() {
return _index;
}
public String getName() {
return _name;
}
public boolean hasFootnote() {
return _hasFootnote;
}
public String formatAsDataLine() {
return _index + "\t" + _name + "\t" + _minParams + "\t"
+ _maxParams + "\t" + _returnClass + "\t" + _paramClasses
+ "\t" + checkMark(_isVolatile) + "\t" + checkMark(_hasFootnote);
}
private static String checkMark(boolean b) {
return b ? "x" : "";
}
}
private static final class FunctionDataCollector {
private final Map<Integer, FunctionData> _allFunctionsByIndex;
private final Map<String, FunctionData> _allFunctionsByName;
private final Set<Integer> _groupFunctionIndexes;
private final Set<String> _groupFunctionNames;
private final PrintStream _ps;
public FunctionDataCollector(PrintStream ps) {
_ps = ps;
_allFunctionsByIndex = new HashMap<Integer, FunctionData>();
_allFunctionsByName = new HashMap<String, FunctionData>();
_groupFunctionIndexes = new HashSet<Integer>();
_groupFunctionNames = new HashSet<String>();
}
public void addFuntion(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
String returnClass, String paramClasses, String volatileFlagStr) {
boolean isVolatile = volatileFlagStr.length() > 0;
Integer funcIxKey = Integer.valueOf(funcIx);
if(!_groupFunctionIndexes.add(funcIxKey)) {
throw new RuntimeException("Duplicate function index (" + funcIx + ")");
}
if(!_groupFunctionNames.add(funcName)) {
throw new RuntimeException("Duplicate function name '" + funcName + "'");
}
checkRedefinedFunction(hasFootnote, funcName, funcIxKey);
FunctionData fd = new FunctionData(funcIx, hasFootnote, funcName,
minParams, maxParams, returnClass, paramClasses, isVolatile);
_allFunctionsByIndex.put(funcIxKey, fd);
_allFunctionsByName.put(funcName, fd);
}
/**
* Some extra validation here.
* Any function which changes definition will have a footnote in the source document
*/
private void checkRedefinedFunction(boolean hasNote, String funcName, Integer funcIxKey) {
FunctionData fdPrev;
// check by index
fdPrev = _allFunctionsByIndex.get(funcIxKey);
if(fdPrev != null) {
if(!fdPrev.hasFootnote() || !hasNote) {
throw new RuntimeException("changing function ["
+ funcIxKey + "] definition without foot-note");
}
_allFunctionsByName.remove(fdPrev.getName());
}
// check by name
fdPrev = _allFunctionsByName.get(funcName);
if(fdPrev != null) {
if(!fdPrev.hasFootnote() || !hasNote) {
throw new RuntimeException("changing function '"
+ funcName + "' definition without foot-note");
}
_allFunctionsByIndex.remove(Integer.valueOf(fdPrev.getIndex()));
}
}
public void endTableGroup(String headingText) {
Integer[] keys = new Integer[_groupFunctionIndexes.size()];
_groupFunctionIndexes.toArray(keys);
_groupFunctionIndexes.clear();
_groupFunctionNames.clear();
Arrays.sort(keys);
_ps.println("# " + headingText);
for (int i = 0; i < keys.length; i++) {
FunctionData fd = _allFunctionsByIndex.get(keys[i]);
_ps.println(fd.formatAsDataLine());
}
}
}
/**
* To avoid drag-in - parse XML using only JDK.
*/
private static class EFFDocHandler implements ContentHandler {
private static final String[] HEADING_PATH_NAMES = {
"office:document-content", "office:body", "office:text", "text:h",
};
private static final String[] TABLE_BASE_PATH_NAMES = {
"office:document-content", "office:body", "office:text", "table:table",
};
private static final String[] TABLE_ROW_RELPATH_NAMES = {
"table:table-row",
};
private static final String[] TABLE_CELL_RELPATH_NAMES = {
"table:table-row", "table:table-cell", "text:p",
};
// after May 2008 there was one more style applied to the footnotes
private static final String[] NOTE_REF_RELPATH_NAMES_OLD = {
"table:table-row", "table:table-cell", "text:p", "text:span", "text:note-ref",
};
private static final String[] NOTE_REF_RELPATH_NAMES = {
"table:table-row", "table:table-cell", "text:p", "text:span", "text:span", "text:note-ref",
};
private final Stack<String> _elemNameStack;
/** <code>true</code> only when parsing the target tables */
private boolean _isInsideTable;
private final List<String> _rowData;
private final StringBuffer _textNodeBuffer;
private final List<Boolean> _rowNoteFlags;
private boolean _cellHasNote;
private final FunctionDataCollector _fdc;
private String _lastHeadingText;
public EFFDocHandler(FunctionDataCollector fdc) {
_fdc = fdc;
_elemNameStack = new Stack<String>();
_isInsideTable = false;
_rowData = new ArrayList<String>();
_textNodeBuffer = new StringBuffer();
_rowNoteFlags = new ArrayList<Boolean>();
}
private boolean matchesTargetPath() {
return matchesPath(0, TABLE_BASE_PATH_NAMES);
}
private boolean matchesRelPath(String[] pathNames) {
return matchesPath(TABLE_BASE_PATH_NAMES.length, pathNames);
}
private boolean matchesPath(int baseStackIndex, String[] pathNames) {
if(_elemNameStack.size() != baseStackIndex + pathNames.length) {
return false;
}
for (int i = 0; i < pathNames.length; i++) {
if(!_elemNameStack.get(baseStackIndex + i).equals(pathNames[i])) {
return false;
}
}
return true;
}
@Override
public void characters(char[] ch, int start, int length) {
// only 2 text nodes where text is collected:
if(matchesRelPath(TABLE_CELL_RELPATH_NAMES) || matchesPath(0, HEADING_PATH_NAMES)) {
_textNodeBuffer.append(ch, start, length);
}
}
@Override
public void endElement(String namespaceURI, String localName, String name) {
String expectedName = _elemNameStack.peek();
if(expectedName != name) {
throw new RuntimeException("close tag mismatch");
}
if(matchesPath(0, HEADING_PATH_NAMES)) {
_lastHeadingText = _textNodeBuffer.toString().trim();
_textNodeBuffer.setLength(0);
}
if(_isInsideTable) {
if(matchesTargetPath()) {
_fdc.endTableGroup(_lastHeadingText);
_isInsideTable = false;
} else if(matchesRelPath(TABLE_ROW_RELPATH_NAMES)) {
String[] cellData = new String[_rowData.size()];
_rowData.toArray(cellData);
_rowData.clear();
Boolean[] noteFlags = new Boolean[_rowNoteFlags.size()];
_rowNoteFlags.toArray(noteFlags);
_rowNoteFlags.clear();
processTableRow(cellData, noteFlags);
} else if(matchesRelPath(TABLE_CELL_RELPATH_NAMES)) {
_rowData.add(_textNodeBuffer.toString().trim());
_rowNoteFlags.add(Boolean.valueOf(_cellHasNote));
_textNodeBuffer.setLength(0);
}
}
_elemNameStack.pop();
}
private void processTableRow(String[] cellData, Boolean[] noteFlags) {
// each table row of the document contains data for two functions
if(cellData.length != 15) {
throw new RuntimeException("Bad table row size");
}
processFunction(cellData, noteFlags, 0);
processFunction(cellData, noteFlags, 8);
}
public void processFunction(String[] cellData, Boolean[] noteFlags, int i) {
String funcIxStr = cellData[i + 0];
if (funcIxStr.length() < 1) {
// empty (happens on the right hand side when there is an odd number of functions)
return;
}
int funcIx = parseInt(funcIxStr);
boolean hasFootnote = noteFlags[i + 1].booleanValue();
String funcName = cellData[i + 1];
int minParams = parseInt(cellData[i + 2]);
int maxParams = parseInt(cellData[i + 3]);
String returnClass = cellData[i + 4];
String paramClasses = cellData[i + 5];
String volatileFlagStr = cellData[i + 6];
_fdc.addFuntion(funcIx, hasFootnote, funcName, minParams, maxParams, returnClass, paramClasses, volatileFlagStr);
}
private static int parseInt(String valStr) {
try {
return Integer.parseInt(valStr);
} catch (NumberFormatException e) {
throw new RuntimeException("Value '" + valStr + "' could not be parsed as an integer");
}
}
@Override
public void startElement(String namespaceURI, String localName, String name, Attributes atts) {
_elemNameStack.add(name);
if(matchesTargetPath()) {
String tableName = atts.getValue("table:name");
if(tableName.startsWith("tab_fml_func") && !tableName.equals("tab_fml_func0")) {
_isInsideTable = true;
}
return;
}
if(matchesPath(0, HEADING_PATH_NAMES)) {
_textNodeBuffer.setLength(0);
} else if(matchesRelPath(TABLE_ROW_RELPATH_NAMES)) {
_rowData.clear();
_rowNoteFlags.clear();
} else if(matchesRelPath(TABLE_CELL_RELPATH_NAMES)) {
_textNodeBuffer.setLength(0);
_cellHasNote = false;
} else if(matchesRelPath(NOTE_REF_RELPATH_NAMES_OLD)) {
_cellHasNote = true;
} else if(matchesRelPath(NOTE_REF_RELPATH_NAMES)) {
_cellHasNote = true;
}
}
@Override
public void endDocument() {
// do nothing
}
@Override
public void endPrefixMapping(String prefix) {
// do nothing
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length) {
// do nothing
}
@Override
public void processingInstruction(String target, String data) {
// do nothing
}
@Override
public void setDocumentLocator(Locator locator) {
// do nothing
}
@Override
public void skippedEntity(String name) {
// do nothing
}
@Override
public void startDocument() {
// do nothing
}
@Override
public void startPrefixMapping(String prefix, String uri) {
// do nothing
}
}
private static void extractFunctionData(FunctionDataCollector fdc, InputStream is) {
XMLReader xr;
try {
// First up, try the default one
xr = XMLReaderFactory.createXMLReader();
} catch (SAXException e) {
// Try one for java 1.4
System.setProperty("org.xml.sax.driver", "org.apache.crimson.parser.XMLReaderImpl");
try {
xr = XMLReaderFactory.createXMLReader();
} catch (SAXException e2) {
throw new RuntimeException(e2);
}
}
xr.setContentHandler(new EFFDocHandler(fdc));
InputSource inSrc = new InputSource(is);
try {
xr.parse(inSrc);
is.close();
} catch (IOException e) {
throw new RuntimeException(e);
} catch (SAXException e) {
throw new RuntimeException(e);
}
}
/**
* To be sure that no tricky unicode chars make it through to the output file.
*/
private static final class SimpleAsciiOutputStream extends OutputStream {
private final OutputStream _os;
public SimpleAsciiOutputStream(OutputStream os) {
_os = os;
}
@Override
public void write(int b) throws IOException {
checkByte(b);
_os.write(b);
}
private static void checkByte(int b) {
if (!isSimpleAscii((char)b)) {
throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected");
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
for (int i = 0; i < len; i++) {
checkByte(b[i + off]);
}
_os.write(b, off, len);
}
}
private static void processFile(File effDocFile, File outFile) {
if(!effDocFile.exists()) {
throw new RuntimeException("file '" + effDocFile.getAbsolutePath() + "' does not exist");
}
OutputStream os;
try {
os = new FileOutputStream(outFile);
} catch (FileNotFoundException e) {
throw new RuntimeException(e);
}
os = new SimpleAsciiOutputStream(os);
PrintStream ps;
try {
ps = new PrintStream(os, true, "UTF-8");
} catch(UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
outputLicenseHeader(ps);
Class<?> genClass = ExcelFileFormatDocFunctionExtractor.class;
ps.println("# Created by (" + genClass.getName() + ")");
// identify the source file
ps.print("# from source file '" + SOURCE_DOC_FILE_NAME + "'");
ps.println(" (size=" + effDocFile.length() + ", md5=" + getFileMD5(effDocFile) + ")");
ps.println("#");
ps.println("#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )");
ps.println("");
try {
// can't use ZipHelper here, because its in a different module
ZipFile zf = new ZipFile(effDocFile);
InputStream is = zf.getInputStream(zf.getEntry("content.xml"));
extractFunctionData(new FunctionDataCollector(ps), is);
zf.close();
} catch (ZipException e) {
throw new RuntimeException(e);
} catch (IOException e) {
throw new RuntimeException(e);
}
ps.close();
String canonicalOutputFileName;
try {
canonicalOutputFileName = outFile.getCanonicalPath();
} catch (IOException e) {
throw new RuntimeException(e);
}
System.out.println("Successfully output to '" + canonicalOutputFileName + "'");
}
private static void outputLicenseHeader(PrintStream ps) {
String[] lines= {
"Licensed to the Apache Software Foundation (ASF) under one or more",
"contributor license agreements. See the NOTICE file distributed with",
"this work for additional information regarding copyright ownership.",
"The ASF licenses this file to You under the Apache License, Version 2.0",
"(the \"License\"); you may not use this file except in compliance with",
"the License. You may obtain a copy of the License at",
"",
" http://www.apache.org/licenses/LICENSE-2.0",
"",
"Unless required by applicable law or agreed to in writing, software",
"distributed under the License is distributed on an \"AS IS\" BASIS,",
"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
"See the License for the specific language governing permissions and",
"limitations under the License.",
};
for (int i = 0; i < lines.length; i++) {
ps.print("# ");
ps.println(lines[i]);
}
ps.println();
}
/**
* Helps identify the source file
*/
private static String getFileMD5(File f) {
MessageDigest m = CryptoFunctions.getMessageDigest(HashAlgorithm.md5);
byte[]buf = new byte[2048];
try {
InputStream is = new FileInputStream(f);
while(true) {
int bytesRead = is.read(buf);
if(bytesRead<1) {
break;
}
m.update(buf, 0, bytesRead);
}
is.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return "0x" + new BigInteger(1, m.digest()).toString(16);
}
private static File downloadSourceFile() {
URL url;
try {
url = new URL("http://sc.openoffice.org/" + SOURCE_DOC_FILE_NAME);
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
File result;
byte[]buf = new byte[2048];
try {
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
System.out.println("downloading " + url.toExternalForm());
result = TempFile.createTempFile("excelfileformat", ".odt");
OutputStream os = new FileOutputStream(result);
while(true) {
int bytesRead = is.read(buf);
if(bytesRead<1) {
break;
}
os.write(buf, 0, bytesRead);
}
is.close();
os.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
System.out.println("file downloaded ok");
return result;
}
public static void main(String[] args) {
File outFile = new File("functionMetadata-asGenerated.txt");
if (false) { // set true to use local file
File dir = new File("c:/temp");
File effDocFile = new File(dir, SOURCE_DOC_FILE_NAME);
processFile(effDocFile, outFile);
return;
}
File tempEFFDocFile = downloadSourceFile();
processFile(tempEFFDocFile, outFile);
tempEFFDocFile.delete();
}
}