blob: d91864450e45f59929d8580efadd3e9f5062514a [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.metamodel.excel;
import java.io.File;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.DataSetHeader;
import org.apache.metamodel.data.DefaultRow;
import org.apache.metamodel.data.EmptyDataSet;
import org.apache.metamodel.data.Style;
import org.apache.metamodel.data.Style.SizeUnit;
import org.apache.metamodel.data.StyleBuilder;
import org.apache.metamodel.query.SelectItem;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.DateUtils;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.FileResource;
import org.apache.metamodel.util.InMemoryResource;
import org.apache.metamodel.util.Resource;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Color;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.FillPatternType;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.FontUnderline;
import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFColor;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.XMLReader;
/**
* Convenience/reusable methods for Excel workbook handling.
*/
final class ExcelUtils {
private static final Logger logger = LoggerFactory.getLogger(ExcelUtils.class);
private ExcelUtils() {
// prevent instantiation
}
public static XMLReader createXmlReader() {
try {
SAXParserFactory saxFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxFactory.newSAXParser();
XMLReader sheetParser = saxParser.getXMLReader();
return sheetParser;
} catch (Exception e) {
throw new MetaModelException(e);
}
}
/**
* Opens a {@link Workbook} based on a {@link Resource}.
*
* @param resource
* @param allowFileOptimization whether or not to allow POI to use file handles which supposedly speeds things up,
* but creates issues when writing multiple times to the same file in short bursts of time.
* @return
*/
public static Workbook readWorkbook(Resource resource, boolean allowFileOptimization) {
if (!resource.isExists()) {
// resource does not exist- create a blank workbook
if (isXlsxFile(resource)) {
return new SXSSFWorkbook(1000);
} else {
return new HSSFWorkbook();
}
}
if (allowFileOptimization && resource instanceof FileResource) {
final File file = ((FileResource) resource).getFile();
try {
// open read-only mode
return WorkbookFactory.create(file, null, true);
} catch (Exception e) {
logger.error("Could not open workbook", e);
throw new IllegalStateException("Could not open workbook", e);
}
}
return resource.read(inputStream -> {
try {
return WorkbookFactory.create(inputStream);
} catch (Exception e) {
logger.error("Could not open workbook", e);
throw new IllegalStateException("Could not open workbook", e);
}
});
}
public static boolean isXlsxFile(Resource resource) {
if (resource == null) {
return false;
}
return resource.getName().toLowerCase().endsWith(".xlsx");
}
/**
* Initializes a workbook instance based on a {@link ExcelDataContext}.
*
* @return a workbook instance based on the ExcelDataContext.
*/
public static Workbook readWorkbookForUpdate(ExcelDataContext dataContext) {
Resource resource = dataContext.getResource();
return readWorkbook(resource, false);
}
/**
* Writes the {@link Workbook} to a {@link Resource}. The {@link Workbook} will be closed as a result of this
* operation!
*
* @param dataContext
* @param wb
*/
public static void writeAndCloseWorkbook(ExcelDataContext dataContext, final Workbook wb) {
// first write to a temp file to avoid that workbook source is the same
// as the target (will cause read+write cyclic overflow)
final Resource realResource = dataContext.getResource();
final Resource tempResource = new InMemoryResource(realResource.getQualifiedPath());
tempResource.write(out -> wb.write(out));
FileHelper.safeClose(wb);
FileHelper.copy(tempResource, realResource);
}
public static String getCellValue(Workbook wb, Cell cell) {
if (cell == null) {
return null;
}
final String result;
switch (cell.getCellType()) {
case BLANK:
case _NONE:
result = null;
break;
case BOOLEAN:
result = Boolean.toString(cell.getBooleanCellValue());
break;
case ERROR:
result = (String) getErrorResult(cell);
break;
case FORMULA:
result = getFormulaCellValue(wb, cell);
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
Date date = cell.getDateCellValue();
if (date == null) {
result = null;
} else {
result = DateUtils.createDateFormat().format(date);
}
} else {
result = getNumericCellValueAsString(cell.getCellStyle(), cell.getNumericCellValue());
}
break;
case STRING:
result = cell.getRichStringCellValue().getString();
break;
default:
throw new IllegalStateException("Unknown cell type: " + cell.getCellType());
}
logger.debug("cell ({},{}) resolved to value: {}", cell.getRowIndex(), cell.getColumnIndex(), result);
return result;
}
private static Object getCellValueAsObject(final Workbook workbook, final Cell cell) {
if (cell == null) {
return null;
}
final Object result;
switch (cell.getCellType()) {
case BLANK:
case _NONE:
result = null;
break;
case BOOLEAN:
result = Boolean.valueOf(cell.getBooleanCellValue());
break;
case ERROR:
result = getErrorResult(cell);
break;
case FORMULA:
result = getFormulaCellValueAsObject(workbook, cell);
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
result = cell.getDateCellValue();
} else {
result = getDoubleAsNumber(cell.getNumericCellValue());
}
break;
case STRING:
result = cell.getRichStringCellValue().getString();
break;
default:
throw new IllegalStateException("Unknown cell type: " + cell.getCellType());
}
logger.debug("cell ({},{}) resolved to value: {}", cell.getRowIndex(), cell.getColumnIndex(), result);
return result;
}
private static String getErrorResult(final Cell cell) {
try {
return FormulaError.forInt(cell.getErrorCellValue()).getString();
} catch (final RuntimeException e) {
logger
.debug("Getting error code for ({},{}) failed!: {}", cell.getRowIndex(), cell.getColumnIndex(), e
.getMessage());
if (cell instanceof XSSFCell) {
// hack to get error string, which is available
return ((XSSFCell) cell).getErrorCellString();
} else {
logger
.error("Couldn't handle unexpected error scenario in cell: ({},{})", cell.getRowIndex(), cell
.getColumnIndex());
throw e;
}
}
}
private static Object evaluateCell(final Workbook workbook, final Cell cell,
final ColumnType expectedColumnType) {
final Object value = getCellValueAsObject(workbook, cell);
if (value == null || value.getClass().equals(expectedColumnType.getJavaEquivalentClass())) {
return value;
}
// Don't log when an Integer value is in a Double column type
if (!(value.getClass().equals(Integer.class) && expectedColumnType
.getJavaEquivalentClass()
.equals(Double.class)) && logger.isWarnEnabled()) {
logger
.warn("Cell ({},{}) has the value '{}' of data type '{}', which doesn't match the detected "
+ "column's data type '{}'. This cell gets value NULL in the DataSet.", cell.getRowIndex(),
cell.getColumnIndex(), value, value.getClass().getSimpleName(), expectedColumnType);
}
return null;
}
private static String getFormulaCellValue(Workbook workbook, Cell cell) {
// first try with a cached/precalculated value
try {
double numericCellValue = cell.getNumericCellValue();
return getNumericCellValueAsString(cell.getCellStyle(), numericCellValue);
} catch (Exception e) {
if (logger.isInfoEnabled()) {
logger.info("Failed to fetch cached/precalculated formula value of cell: " + cell, e);
}
}
// evaluate cell first, if possible
final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
if (evaluatedCell != null) {
return getCellValue(workbook, evaluatedCell);
} else {
// last resort: return the string formula
return cell.getCellFormula();
}
}
private static Object getFormulaCellValueAsObject(final Workbook workbook, final Cell cell) {
// first try with a cached/precalculated value
try {
return getDoubleAsNumber(cell.getNumericCellValue());
} catch (final Exception e) {
if (logger.isInfoEnabled()) {
logger.info("Failed to fetch cached/precalculated formula value of cell: " + cell, e);
}
}
// evaluate cell first, if possible
final Cell evaluatedCell = getEvaluatedCell(workbook, cell);
if (evaluatedCell != null) {
return getCellValueAsObject(workbook, evaluatedCell);
} else {
// last resort: return the string formula
return cell.getCellFormula();
}
}
private static Cell getEvaluatedCell(final Workbook workbook, final Cell cell) {
try {
if (logger.isInfoEnabled()) {
logger
.info("cell ({},{}) is a formula. Attempting to evaluate: {}", cell.getRowIndex(), cell
.getColumnIndex(), cell.getCellFormula());
}
final FormulaEvaluator evaluator = workbook.getCreationHelper().createFormulaEvaluator();
// calculates the formula and puts it's value back into the cell
return evaluator.evaluateInCell(cell);
} catch (RuntimeException e) {
logger
.warn("Exception occurred while evaluating formula at position ({},{}): {}", cell.getRowIndex(),
cell.getColumnIndex(), e.getMessage());
}
return null;
}
private static Number getDoubleAsNumber(final double value) {
final Double doubleValue = Double.valueOf(value);
if (doubleValue % 1 == 0 && doubleValue <= Integer.MAX_VALUE) {
return Integer.valueOf(doubleValue.intValue());
} else {
return doubleValue;
}
}
public static Style getCellStyle(Workbook workbook, Cell cell) {
if (cell == null) {
return Style.NO_STYLE;
}
final CellStyle cellStyle = cell.getCellStyle();
final int fontIndex = cellStyle.getFontIndexAsInt();
final Font font = workbook.getFontAt(fontIndex);
final StyleBuilder styleBuilder = new StyleBuilder();
// Font bold, italic, underline
if (font.getBold()) {
styleBuilder.bold();
}
if (font.getItalic()) {
styleBuilder.italic();
}
if (font.getUnderline() != FontUnderline.NONE.getByteValue()) {
styleBuilder.underline();
}
// Font size
final Font stdFont = workbook.getFontAt(0);
final short fontSize = font.getFontHeightInPoints();
if (stdFont.getFontHeightInPoints() != fontSize) {
styleBuilder.fontSize(fontSize, SizeUnit.PT);
}
// Font color
final short colorIndex = font.getColor();
if (font instanceof HSSFFont) {
if (colorIndex != HSSFFont.COLOR_NORMAL) {
final HSSFWorkbook wb = (HSSFWorkbook) workbook;
HSSFColor color = wb.getCustomPalette().getColor(colorIndex);
if (color != null) {
short[] triplet = color.getTriplet();
styleBuilder.foreground(triplet);
}
}
} else if (font instanceof XSSFFont) {
XSSFFont xssfFont = (XSSFFont) font;
XSSFColor color = xssfFont.getXSSFColor();
if (color != null) {
String argbHex = color.getARGBHex();
if (argbHex != null) {
styleBuilder.foreground(argbHex.substring(2));
}
}
} else {
throw new IllegalStateException("Unexpected font type: " + (font == null ? "null" : font.getClass()) + ")");
}
// Background color
if (cellStyle.getFillPattern() == FillPatternType.SOLID_FOREGROUND) {
Color color = cellStyle.getFillForegroundColorColor();
if (color instanceof HSSFColor) {
short[] triplet = ((HSSFColor) color).getTriplet();
if (triplet != null) {
styleBuilder.background(triplet);
}
} else if (color instanceof XSSFColor) {
String argb = ((XSSFColor) color).getARGBHex();
if (argb != null) {
styleBuilder.background(argb.substring(2));
}
} else {
throw new IllegalStateException(
"Unexpected color type: " + (color == null ? "null" : color.getClass()) + ")");
}
}
// alignment
switch (cellStyle.getAlignment()) {
case LEFT:
styleBuilder.leftAligned();
break;
case RIGHT:
styleBuilder.rightAligned();
break;
case CENTER:
styleBuilder.centerAligned();
break;
case JUSTIFY:
styleBuilder.justifyAligned();
break;
default:
// we currently don't support other alignment styles
break;
}
return styleBuilder.create();
}
public static Iterator<Row> getRowIterator(Sheet sheet, ExcelConfiguration configuration, boolean jumpToDataRows) {
final Iterator<Row> iterator;
if (configuration.isSkipEmptyLines()) {
iterator = sheet.rowIterator();
} else {
iterator = new ZeroBasedRowIterator(sheet);
}
if (jumpToDataRows) {
final int columnNameLineNumber = configuration.getColumnNameLineNumber();
if (columnNameLineNumber != ExcelConfiguration.NO_COLUMN_NAME_LINE) {
// iterate past the column headers
if (iterator.hasNext()) {
iterator.next();
}
for (int i = 1; i < columnNameLineNumber; i++) {
if (iterator.hasNext()) {
iterator.next();
} else {
// no more rows!
break;
}
}
}
}
return iterator;
}
/**
* Creates a MetaModel row based on an Excel row
*
* @param workbook
* @param row
* @param selectItems select items of the columns in the table
* @return
*/
public static DefaultRow createRow(final Workbook workbook, final Row row, final DataSetHeader header) {
final int size = header.size();
final Object[] values = new Object[size];
final Style[] styles = new Style[size];
if (row != null) {
for (int i = 0; i < size; i++) {
final int columnNumber = header.getSelectItem(i).getColumn().getColumnNumber();
final ColumnType columnType = header.getSelectItem(i).getColumn().getType();
final Cell cell = row.getCell(columnNumber);
final Object value;
if (columnType.equals(DefaultSpreadsheetReaderDelegate.DEFAULT_COLUMN_TYPE) || columnType
.equals(DefaultSpreadsheetReaderDelegate.LEGACY_COLUMN_TYPE)) {
value = ExcelUtils.getCellValue(workbook, cell);
} else {
value = ExcelUtils.evaluateCell(workbook, cell, columnType);
}
final Style style = ExcelUtils.getCellStyle(workbook, cell);
values[i] = value;
styles[i] = style;
}
}
return new DefaultRow(header, values, styles);
}
public static DataSet getDataSet(Workbook workbook, Sheet sheet, Table table, ExcelConfiguration configuration) {
final List<SelectItem> selectItems =
table.getColumns().stream().map(SelectItem::new).collect(Collectors.toList());
final Iterator<Row> rowIterator = getRowIterator(sheet, configuration, true);
if (!rowIterator.hasNext()) {
// no more rows!
FileHelper.safeClose(workbook);
return new EmptyDataSet(selectItems);
}
final DataSet dataSet = new XlsDataSet(selectItems, workbook, rowIterator);
return dataSet;
}
private static String getNumericCellValueAsString(final CellStyle cellStyle, final double cellValue) {
final int formatIndex = cellStyle.getDataFormat();
String formatString = cellStyle.getDataFormatString();
if (formatString == null) {
formatString = BuiltinFormats.getBuiltinFormat(formatIndex);
}
final DataFormatter formatter = new DataFormatter();
return formatter.formatRawCellContents(cellValue, formatIndex, formatString);
}
}