blob: 8237ec125d053391127302496c201e3727f4872c [file] [log] [blame]
/*******************************************************************************
* Copyright (C) 2009 The University of Manchester
*
* Modifications to the initial code base are copyright of their
* respective authors, or their employers as appropriate.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
******************************************************************************/
package net.sf.taverna.t2.activities.spreadsheet;
import java.io.IOException;
import java.io.InputStream;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
/**
* Reads Excel '97 (.xls) or Excel '07 (.xlsx) spreadsheet files.
*
* @author David Withers
*/
public class ExcelSpreadsheetReader implements SpreadsheetReader {
private static Logger logger = Logger.getLogger(ExcelSpreadsheetReader.class);
public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor)
throws SpreadsheetReadException {
Workbook workbook;
try {
workbook = WorkbookFactory.create(inputStream);
} catch (InvalidFormatException e) {
throw new SpreadsheetReadException(
"The file does not have a compatible spreadsheet format", e);
} catch (IOException e) {
throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
} catch (IllegalArgumentException e) {
throw new SpreadsheetReadException("The spreadsheet stream could not be read", e);
}
DataFormatter dataFormatter = new DataFormatter();
workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK);
Sheet sheet = workbook.getSheetAt(0);
if (rowRange.getEnd() < 0) {
rowRange.setEnd(sheet.getLastRowNum());
logger.debug("No end of row range specified, setting to " + rowRange.getEnd());
}
SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>();
for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) {
boolean blankRow = true;
if (rowRange.contains(rowIndex)) {
Row row = sheet.getRow(rowIndex);
for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) {
if (columnRange.contains(columnIndex)) {
String value = null;
if (row != null) {
Cell cell = row.getCell(columnIndex);
if (cell != null) {
value = getCellValue(cell, dataFormatter);
}
}
if (value != null) {
blankRow = false;
}
currentDataRow.put(columnIndex, value);
if (columnIndex == columnRange.getEnd()) {
if (!ignoreBlankRows || !blankRow) {
rowProcessor.processRow(rowIndex, currentDataRow);
}
currentDataRow = new TreeMap<Integer, String>();
}
}
}
}
}
}
private String getCellValue(Cell cell, DataFormatter dataFormatter) {
String value = null;
switch (cell.getCellType()) {
case Cell.CELL_TYPE_BOOLEAN:
value = Boolean.toString(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
// value = cell.getDateCellValue().toString();
value = dataFormatter.formatCellValue(cell);
} else {
value = Double.toString(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
case Cell.CELL_TYPE_FORMULA:
switch (cell.getCachedFormulaResultType()) {
case Cell.CELL_TYPE_BOOLEAN:
value = Boolean.toString(cell.getBooleanCellValue());
break;
case Cell.CELL_TYPE_NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
value = cell.getDateCellValue().toString();
} else {
value = Double.toString(cell.getNumericCellValue());
}
break;
case Cell.CELL_TYPE_STRING:
value = cell.getStringCellValue();
break;
default:
break;
}
default:
break;
}
// value = dataFormatter.formatCellValue(cell);
// if ("".equals(value)) value = null;
return value;
}
// /**
// * Reads data from an HSSF stream.
// *
// * @param inputStream
// * @param spreradsheetRowProcessor
// * @throws IOException
// * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
// * duplicated) data is contained in one event so memory footprint isn't much smaller
// */
// public void readHSSF(InputStream inputStream, SpreadsheetRowProcessor spreradsheetRowProcessor)
// throws IOException {
// POIFSFileSystem poifs = new POIFSFileSystem(inputStream);
//
// // get the workbook part of the stream
// InputStream documentInputStream = poifs.createDocumentInputStream("Workbook");
//
// RecordProcessor recordProcessor = new RecordProcessor(spreradsheetRowProcessor);
// MissingRecordAwareHSSFListener hssfListener = new MissingRecordAwareHSSFListener(
// recordProcessor);
//
// // listen for all records
// HSSFRequest request = new HSSFRequest();
// request.addListenerForAllRecords(hssfListener);
//
// HSSFEventFactory factory = new HSSFEventFactory();
// factory.processEvents(request, documentInputStream);
//
// inputStream.close();
// documentInputStream.close();
// }
//
// /**
// * Listener for processing events from an HSSF stream.
// *
// * @author David Withers
// * @deprecated can't generalize for XSSF streams and not much advantage as all the (non
// * duplicated) data is contained in one event so memory footprint isn't much smaller
// */
// class RecordProcessor implements HSSFListener {
//
// private SpreadsheetRowProcessor spreradsheetRowProcessor;
//
// private SSTRecord sstrec;
//
// private boolean worksheetOpen = false;
//
// private int row, column;
//
// private Map<Integer, String> currentDataRow = new HashMap<Integer, String>();
//
// public RecordProcessor(SpreadsheetRowProcessor spreradsheetRowProcessor) {
// this.spreradsheetRowProcessor = spreradsheetRowProcessor;
// }
//
// public void processRecord(Record record) {
// switch (record.getSid()) {
// // the BOFRecord can represent either the beginning of a sheet or
// // the workbook
// case BOFRecord.sid:
// BOFRecord bof = (BOFRecord) record;
// if (bof.getType() == BOFRecord.TYPE_WORKSHEET) {
// worksheetOpen = true;
// }
// break;
// case EOFRecord.sid:
// if (worksheetOpen) {
// while (row < rowRange.getEnd()) {
// row++;
// if (rowRange.contains(row)) {
// for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
// processCell(row, column, null);
// }
// spreradsheetRowProcessor.processRow(row, currentDataRow);
// }
// currentDataRow = new HashMap<Integer, String>();
// }
// worksheetOpen = false;
// }
// break;
// // don't care about sheet name for now
// // case BoundSheetRecord.sid:
// // BoundSheetRecord bsr = (BoundSheetRecord) record;
// // logger.info("New sheet named: " + bsr.getSheetname());
// // break;
// case RowRecord.sid:
// // RowRecord rowRecord = (RowRecord) record;
// // if (readAllRows) {
// // int rowNumber = row.getRowNumber();
// // if (rowNumber < minRow) {
// // minRow = rowNumber;
// // currentRow = rowNumber;
// // }
// // if (rowNumber > maxRow) {
// // maxRow = rowNumber;
// // }
// // }
// // if (readAllColumns) {
// // int firstColumn = row.getFirstCol();
// // int lastColumn = row.getLastCol() - 1;
// // if (firstColumn < minColumn) {
// // minColumn = firstColumn;
// // currentColumn = firstColumn;
// // }
// // if (lastColumn > maxColumn) {
// // maxColumn = lastColumn;
// // }
// // }
//
// break;
// case NumberRecord.sid:
// NumberRecord number = (NumberRecord) record;
// row = number.getRow();
// column = number.getColumn();
// processCell(row, column, String.valueOf(number.getValue()));
// break;
// case SSTRecord.sid:
// // SSTRecords store a array of unique strings used in Excel.
// sstrec = (SSTRecord) record;
// break;
// case LabelSSTRecord.sid:
// LabelSSTRecord label = (LabelSSTRecord) record;
// row = label.getRow();
// column = label.getColumn();
// processCell(row, column, sstrec.getString(label.getSSTIndex()).getString());
// break;
// case BlankRecord.sid:
// BlankRecord blank = (BlankRecord) record;
// row = blank.getRow();
// column = blank.getColumn();
// processCell(row, column, null);
// break;
// }
//
// // Missing column
// if (record instanceof MissingCellDummyRecord) {
// MissingCellDummyRecord cell = (MissingCellDummyRecord) record;
// row = cell.getRow();
// column = cell.getColumn();
// processCell(row, column, null);
// }
//
// // Missing row
// if (record instanceof MissingRowDummyRecord) {
// MissingRowDummyRecord missingRow = (MissingRowDummyRecord) record;
// row = missingRow.getRowNumber();
// if (rowRange.contains(row)) {
// for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) {
// processCell(row, column, null);
// }
// spreradsheetRowProcessor.processRow(row, currentDataRow);
// }
// currentDataRow = new HashMap<Integer, String>();
// }
//
// // End of row
// if (record instanceof LastCellOfRowDummyRecord) {
// LastCellOfRowDummyRecord lastCell = (LastCellOfRowDummyRecord) record;
// row = lastCell.getRow();
// if (rowRange.contains(row)) {
// int lastColumn = lastCell.getLastColumnNumber();
// for (column = lastColumn + 1; column <= columnRange.getEnd(); column++) {
// processCell(row, column, null);
// }
// spreradsheetRowProcessor.processRow(row, currentDataRow);
// }
// currentDataRow = new HashMap<Integer, String>();
// }
// }
//
// private void processCell(int row, int column, String value) {
// if (rowRange.contains(row) && columnRange.contains(column)) {
// currentDataRow.put(column, value);
// }
// }
//
// }
}