| /******************************************************************************* |
| * Copyright (C) 2009 The University of Manchester |
| * |
| * Modifications to the initial code base are copyright of their |
| * respective authors, or their employers as appropriate. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public License |
| * as published by the Free Software Foundation; either version 2.1 of |
| * the License, or (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, but |
| * WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this program; if not, write to the Free Software |
| * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
| ******************************************************************************/ |
| package net.sf.taverna.t2.activities.spreadsheet; |
| |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| |
| import org.apache.log4j.Logger; |
| import org.apache.poi.openxml4j.exceptions.InvalidFormatException; |
| import org.apache.poi.ss.usermodel.Cell; |
| import org.apache.poi.ss.usermodel.DataFormatter; |
| import org.apache.poi.ss.usermodel.DateUtil; |
| import org.apache.poi.ss.usermodel.Row; |
| import org.apache.poi.ss.usermodel.Sheet; |
| import org.apache.poi.ss.usermodel.Workbook; |
| import org.apache.poi.ss.usermodel.WorkbookFactory; |
| |
| /** |
| * Reads Excel '97 (.xls) or Excel '07 (.xlsx) spreadsheet files. |
| * |
| * @author David Withers |
| */ |
| public class ExcelSpreadsheetReader implements SpreadsheetReader { |
| |
| private static Logger logger = Logger.getLogger(ExcelSpreadsheetReader.class); |
| |
| public void read(InputStream inputStream, Range rowRange, Range columnRange, boolean ignoreBlankRows, SpreadsheetRowProcessor rowProcessor) |
| throws SpreadsheetReadException { |
| Workbook workbook; |
| try { |
| workbook = WorkbookFactory.create(inputStream); |
| } catch (InvalidFormatException e) { |
| throw new SpreadsheetReadException( |
| "The file does not have a compatible spreadsheet format", e); |
| } catch (IOException e) { |
| throw new SpreadsheetReadException("The spreadsheet stream could not be read", e); |
| } catch (IllegalArgumentException e) { |
| throw new SpreadsheetReadException("The spreadsheet stream could not be read", e); |
| } |
| |
| DataFormatter dataFormatter = new DataFormatter(); |
| |
| workbook.setMissingCellPolicy(Row.CREATE_NULL_AS_BLANK); |
| Sheet sheet = workbook.getSheetAt(0); |
| |
| if (rowRange.getEnd() < 0) { |
| rowRange.setEnd(sheet.getLastRowNum()); |
| logger.debug("No end of row range specified, setting to " + rowRange.getEnd()); |
| } |
| |
| SortedMap<Integer, String> currentDataRow = new TreeMap<Integer, String>(); |
| |
| for (int rowIndex = rowRange.getStart(); rowIndex <= rowRange.getEnd(); rowIndex++) { |
| boolean blankRow = true; |
| if (rowRange.contains(rowIndex)) { |
| Row row = sheet.getRow(rowIndex); |
| for (int columnIndex = columnRange.getStart(); columnIndex <= columnRange.getEnd(); columnIndex++) { |
| if (columnRange.contains(columnIndex)) { |
| String value = null; |
| if (row != null) { |
| Cell cell = row.getCell(columnIndex); |
| if (cell != null) { |
| value = getCellValue(cell, dataFormatter); |
| } |
| } |
| if (value != null) { |
| blankRow = false; |
| } |
| currentDataRow.put(columnIndex, value); |
| if (columnIndex == columnRange.getEnd()) { |
| if (!ignoreBlankRows || !blankRow) { |
| rowProcessor.processRow(rowIndex, currentDataRow); |
| } |
| currentDataRow = new TreeMap<Integer, String>(); |
| } |
| } |
| } |
| } |
| } |
| |
| } |
| |
| private String getCellValue(Cell cell, DataFormatter dataFormatter) { |
| String value = null; |
| switch (cell.getCellType()) { |
| case Cell.CELL_TYPE_BOOLEAN: |
| value = Boolean.toString(cell.getBooleanCellValue()); |
| break; |
| case Cell.CELL_TYPE_NUMERIC: |
| if (DateUtil.isCellDateFormatted(cell)) { |
| // value = cell.getDateCellValue().toString(); |
| value = dataFormatter.formatCellValue(cell); |
| } else { |
| value = Double.toString(cell.getNumericCellValue()); |
| } |
| break; |
| case Cell.CELL_TYPE_STRING: |
| value = cell.getStringCellValue(); |
| break; |
| case Cell.CELL_TYPE_FORMULA: |
| switch (cell.getCachedFormulaResultType()) { |
| case Cell.CELL_TYPE_BOOLEAN: |
| value = Boolean.toString(cell.getBooleanCellValue()); |
| break; |
| case Cell.CELL_TYPE_NUMERIC: |
| if (DateUtil.isCellDateFormatted(cell)) { |
| value = cell.getDateCellValue().toString(); |
| } else { |
| value = Double.toString(cell.getNumericCellValue()); |
| } |
| break; |
| case Cell.CELL_TYPE_STRING: |
| value = cell.getStringCellValue(); |
| break; |
| default: |
| break; |
| } |
| default: |
| break; |
| } |
| // value = dataFormatter.formatCellValue(cell); |
| // if ("".equals(value)) value = null; |
| return value; |
| } |
| |
| |
| // /** |
| // * Reads data from an HSSF stream. |
| // * |
| // * @param inputStream |
| // * @param spreradsheetRowProcessor |
| // * @throws IOException |
| // * @deprecated can't generalize for XSSF streams and not much advantage as all the (non |
| // * duplicated) data is contained in one event so memory footprint isn't much smaller |
| // */ |
| // public void readHSSF(InputStream inputStream, SpreadsheetRowProcessor spreradsheetRowProcessor) |
| // throws IOException { |
| // POIFSFileSystem poifs = new POIFSFileSystem(inputStream); |
| // |
| // // get the workbook part of the stream |
| // InputStream documentInputStream = poifs.createDocumentInputStream("Workbook"); |
| // |
| // RecordProcessor recordProcessor = new RecordProcessor(spreradsheetRowProcessor); |
| // MissingRecordAwareHSSFListener hssfListener = new MissingRecordAwareHSSFListener( |
| // recordProcessor); |
| // |
| // // listen for all records |
| // HSSFRequest request = new HSSFRequest(); |
| // request.addListenerForAllRecords(hssfListener); |
| // |
| // HSSFEventFactory factory = new HSSFEventFactory(); |
| // factory.processEvents(request, documentInputStream); |
| // |
| // inputStream.close(); |
| // documentInputStream.close(); |
| // } |
| // |
| // /** |
| // * Listener for processing events from an HSSF stream. |
| // * |
| // * @author David Withers |
| // * @deprecated can't generalize for XSSF streams and not much advantage as all the (non |
| // * duplicated) data is contained in one event so memory footprint isn't much smaller |
| // */ |
| // class RecordProcessor implements HSSFListener { |
| // |
| // private SpreadsheetRowProcessor spreradsheetRowProcessor; |
| // |
| // private SSTRecord sstrec; |
| // |
| // private boolean worksheetOpen = false; |
| // |
| // private int row, column; |
| // |
| // private Map<Integer, String> currentDataRow = new HashMap<Integer, String>(); |
| // |
| // public RecordProcessor(SpreadsheetRowProcessor spreradsheetRowProcessor) { |
| // this.spreradsheetRowProcessor = spreradsheetRowProcessor; |
| // } |
| // |
| // public void processRecord(Record record) { |
| // switch (record.getSid()) { |
| // // the BOFRecord can represent either the beginning of a sheet or |
| // // the workbook |
| // case BOFRecord.sid: |
| // BOFRecord bof = (BOFRecord) record; |
| // if (bof.getType() == BOFRecord.TYPE_WORKSHEET) { |
| // worksheetOpen = true; |
| // } |
| // break; |
| // case EOFRecord.sid: |
| // if (worksheetOpen) { |
| // while (row < rowRange.getEnd()) { |
| // row++; |
| // if (rowRange.contains(row)) { |
| // for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) { |
| // processCell(row, column, null); |
| // } |
| // spreradsheetRowProcessor.processRow(row, currentDataRow); |
| // } |
| // currentDataRow = new HashMap<Integer, String>(); |
| // } |
| // worksheetOpen = false; |
| // } |
| // break; |
| // // don't care about sheet name for now |
| // // case BoundSheetRecord.sid: |
| // // BoundSheetRecord bsr = (BoundSheetRecord) record; |
| // // logger.info("New sheet named: " + bsr.getSheetname()); |
| // // break; |
| // case RowRecord.sid: |
| // // RowRecord rowRecord = (RowRecord) record; |
| // // if (readAllRows) { |
| // // int rowNumber = row.getRowNumber(); |
| // // if (rowNumber < minRow) { |
| // // minRow = rowNumber; |
| // // currentRow = rowNumber; |
| // // } |
| // // if (rowNumber > maxRow) { |
| // // maxRow = rowNumber; |
| // // } |
| // // } |
| // // if (readAllColumns) { |
| // // int firstColumn = row.getFirstCol(); |
| // // int lastColumn = row.getLastCol() - 1; |
| // // if (firstColumn < minColumn) { |
| // // minColumn = firstColumn; |
| // // currentColumn = firstColumn; |
| // // } |
| // // if (lastColumn > maxColumn) { |
| // // maxColumn = lastColumn; |
| // // } |
| // // } |
| // |
| // break; |
| // case NumberRecord.sid: |
| // NumberRecord number = (NumberRecord) record; |
| // row = number.getRow(); |
| // column = number.getColumn(); |
| // processCell(row, column, String.valueOf(number.getValue())); |
| // break; |
| // case SSTRecord.sid: |
| // // SSTRecords store a array of unique strings used in Excel. |
| // sstrec = (SSTRecord) record; |
| // break; |
| // case LabelSSTRecord.sid: |
| // LabelSSTRecord label = (LabelSSTRecord) record; |
| // row = label.getRow(); |
| // column = label.getColumn(); |
| // processCell(row, column, sstrec.getString(label.getSSTIndex()).getString()); |
| // break; |
| // case BlankRecord.sid: |
| // BlankRecord blank = (BlankRecord) record; |
| // row = blank.getRow(); |
| // column = blank.getColumn(); |
| // processCell(row, column, null); |
| // break; |
| // } |
| // |
| // // Missing column |
| // if (record instanceof MissingCellDummyRecord) { |
| // MissingCellDummyRecord cell = (MissingCellDummyRecord) record; |
| // row = cell.getRow(); |
| // column = cell.getColumn(); |
| // processCell(row, column, null); |
| // } |
| // |
| // // Missing row |
| // if (record instanceof MissingRowDummyRecord) { |
| // MissingRowDummyRecord missingRow = (MissingRowDummyRecord) record; |
| // row = missingRow.getRowNumber(); |
| // if (rowRange.contains(row)) { |
| // for (column = columnRange.getStart(); column <= columnRange.getEnd(); column++) { |
| // processCell(row, column, null); |
| // } |
| // spreradsheetRowProcessor.processRow(row, currentDataRow); |
| // } |
| // currentDataRow = new HashMap<Integer, String>(); |
| // } |
| // |
| // // End of row |
| // if (record instanceof LastCellOfRowDummyRecord) { |
| // LastCellOfRowDummyRecord lastCell = (LastCellOfRowDummyRecord) record; |
| // row = lastCell.getRow(); |
| // if (rowRange.contains(row)) { |
| // int lastColumn = lastCell.getLastColumnNumber(); |
| // for (column = lastColumn + 1; column <= columnRange.getEnd(); column++) { |
| // processCell(row, column, null); |
| // } |
| // spreradsheetRowProcessor.processRow(row, currentDataRow); |
| // } |
| // currentDataRow = new HashMap<Integer, String>(); |
| // } |
| // } |
| // |
| // private void processCell(int row, int column, String value) { |
| // if (rowRange.contains(row) && columnRange.contains(column)) { |
| // currentDataRow.put(column, value); |
| // } |
| // } |
| // |
| // } |
| |
| } |