blob: 43fa5ea8e32f72a01ef4b319d5ba9b068733f15e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.freemarker.generator.tools.excel;
import org.apache.freemarker.generator.base.document.Document;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import static org.apache.poi.ss.usermodel.Row.MissingCellPolicy.CREATE_NULL_AS_BLANK;
/**
* Parse Excel documents (XLS & XLSX) using Apache POI.
*/
public class ExcelTool {
private static final boolean EMULATE_CSV = true;
private static final SimpleDateFormat TIME_DATE_FORMAT = new SimpleDateFormat("HH:mm:ss");
private static final SimpleDateFormat YEAR_TIME_FORMAT = new SimpleDateFormat("yyyy");
public Workbook parse(Document document) {
try (InputStream is = document.getUnsafeInputStream()) {
final Workbook workbook = WorkbookFactory.create(is);
// make sure that the workbook is closed together with the document
return document.addClosable(workbook);
} catch (IOException e) {
throw new RuntimeException("Failed to parse Ecxel document: " + document, e);
}
}
/**
* Get all sheets of a workbook.
*
* @param workbook The workbook
* @return Sheets of the workbook
*/
public List<Sheet> getSheets(Workbook workbook) {
final List<Sheet> result = new ArrayList<>();
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
result.add(workbook.getSheetAt(i));
}
return result;
}
/**
* Transform the Excel sheet into a table. Please not that locales are mostly
* ignored by Apache POI (see https://poi.apache.org/apidocs/dev/org/apache/poi/ss/usermodel/DataFormatter.html)
*
* @param sheet Excel sheet
* @return Table containing formatted cell values as strings
*/
public List<List<String>> toTable(Sheet sheet) {
final DataFormatter dataFormatter = dataFormatter();
final Iterator<Row> iterator = sheet.iterator();
final List<List<String>> result = new ArrayList<>();
while (iterator.hasNext()) {
final Row row = iterator.next();
result.add(toColumns(row, dataFormatter));
}
return result;
}
/**
* EXPERIMENTAL FEATURE
* <p>
* Transform the sheet to table contaning raw Java objects, e.g. Date, Double, ...
*
* @param sheet Excel sheet
* @return Table containing cells as raw Java objects
*/
public List<List<Object>> toRawTable(Sheet sheet) {
final DataFormatter dataFormatter = dataFormatter();
final Iterator<Row> iterator = sheet.iterator();
final List<List<Object>> result = new ArrayList<>();
while (iterator.hasNext()) {
final Row row = iterator.next();
result.add(toRawColumns(row, dataFormatter));
}
return result;
}
@Override
public String toString() {
return "Process Excels files (XLS, XLSX) using Apache POI (see https://poi.apache.org)";
}
private static List<String> toColumns(Row row, DataFormatter dataFormatter) {
final List<String> columnValues = new ArrayList<>();
for (int columnIndex = 0; columnIndex < row.getLastCellNum(); columnIndex++) {
final Cell cell = row.getCell(columnIndex, CREATE_NULL_AS_BLANK);
final String formatedCellValue = dataFormatter.formatCellValue(cell).trim();
columnValues.add(formatedCellValue);
}
return columnValues;
}
private static List<Object> toRawColumns(Row row, DataFormatter dataFormatter) {
final List<Object> columnValues = new ArrayList<>();
for (int columnIndex = 0; columnIndex < row.getLastCellNum(); columnIndex++) {
final Cell cell = row.getCell(columnIndex, CREATE_NULL_AS_BLANK);
final Object cellValue = toCellValue(cell, dataFormatter);
columnValues.add(cellValue);
}
return columnValues;
}
private static Object toCellValue(Cell cell, DataFormatter formatter) {
final CellType cellType = cell.getCellType();
switch (cellType) {
case BOOLEAN:
return cell.getBooleanCellValue();
case NUMERIC:
return DateUtil.isCellDateFormatted(cell) ? toDateCellValue(cell) : cell.getNumericCellValue();
default:
return formatter.formatCellValue(cell);
}
}
/**
* Try desperately to make sense out of Excel and its handling of dates.
* See https://stackoverflow.com/questions/15710888/reading-time-values-from-spreadsheet-using-poi-api.
*
* @param cell Cell containing some sort of date or time
* @return The corresponding Java istance
*/
private static synchronized Object toDateCellValue(Cell cell) {
final Date date = cell.getDateCellValue();
// "Time-only" values have date set to 31-Dec-1899 so if year is "1899"
// you can assume it is a "time-only" value
final String year = YEAR_TIME_FORMAT.format(date);
if (year.equals("1899")) {
// handle "Time-only" value
return LocalDateTime.ofInstant(date.toInstant(), ZoneId.systemDefault()).toLocalTime();
} else {
// here you may have a date-only or date-time value
final String timeStamp = TIME_DATE_FORMAT.format(date);
if (timeStamp.equals("00:00:00")) {
// if time is 00:00:00 you can assume it is a date only value (but it could be midnight)
return new java.sql.Date(date.getTime()).toLocalDate();
} else {
return date;
}
}
}
private static DataFormatter dataFormatter() {
return new DataFormatter(EMULATE_CSV);
}
}