| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| package org.apache.poi.hssf.usermodel; |
| |
| import java.util.HashSet; |
| |
| import org.apache.poi.hssf.record.ExtendedFormatRecord; |
| import org.apache.poi.hssf.record.FontRecord; |
| import org.apache.poi.hssf.record.StyleRecord; |
| import org.apache.poi.hssf.record.common.UnicodeString; |
| import org.apache.poi.ss.usermodel.Cell; |
| import org.apache.poi.ss.usermodel.CellType; |
| import org.apache.poi.ss.usermodel.Row; |
| |
| /** |
| * Excel can get cranky if you give it files containing too |
| * many (especially duplicate) objects, and this class can |
| * help to avoid those. |
| * In general, it's much better to make sure you don't |
| * duplicate the objects in your code, as this is likely |
| * to be much faster than creating lots and lots of |
| * excel objects+records, only to optimise them down to |
| * many fewer at a later stage. |
| * However, sometimes this is too hard / tricky to do, which |
| * is where the use of this class comes in. |
| */ |
| public class HSSFOptimiser { |
| /** |
| * Goes through the Workbook, optimising the fonts by |
| * removing duplicate ones. |
| * For now, only works on fonts used in {@link HSSFCellStyle} |
| * and {@link HSSFRichTextString}. Any other font uses |
| * (eg charts, pictures) may well end up broken! |
| * This can be a slow operation, especially if you have |
| * lots of cells, cell styles or rich text strings |
| * @param workbook The workbook in which to optimise the fonts |
| */ |
| public static void optimiseFonts(HSSFWorkbook workbook) { |
| // Where each font has ended up, and if we need to |
| // delete the record for it. Start off with no change |
| short[] newPos = |
| new short[workbook.getWorkbook().getNumberOfFontRecords()+1]; |
| boolean[] zapRecords = new boolean[newPos.length]; |
| for(int i=0; i<newPos.length; i++) { |
| newPos[i] = (short)i; |
| zapRecords[i] = false; |
| } |
| |
| // Get each font record, so we can do deletes |
| // without getting confused |
| FontRecord[] frecs = new FontRecord[newPos.length]; |
| for(int i=0; i<newPos.length; i++) { |
| // There is no 4! |
| if(i == 4) continue; |
| |
| frecs[i] = workbook.getWorkbook().getFontRecordAt(i); |
| } |
| |
| // Loop over each font, seeing if it is the same |
| // as an earlier one. If it is, point users of the |
| // later duplicate copy to the earlier one, and |
| // mark the later one as needing deleting |
| // Note - don't change built in fonts (those before 5) |
| for(int i=5; i<newPos.length; i++) { |
| // Check this one for being a duplicate |
| // of an earlier one |
| int earlierDuplicate = -1; |
| for(int j=0; j<i && earlierDuplicate == -1; j++) { |
| if(j == 4) continue; |
| |
| FontRecord frCheck = workbook.getWorkbook().getFontRecordAt(j); |
| if(frCheck.sameProperties(frecs[i])) { |
| earlierDuplicate = j; |
| } |
| } |
| |
| // If we got a duplicate, mark it as such |
| if(earlierDuplicate != -1) { |
| newPos[i] = (short)earlierDuplicate; |
| zapRecords[i] = true; |
| } |
| } |
| |
| // Update the new positions based on |
| // deletes that have occurred between |
| // the start and them |
| // Only need to worry about user fonts |
| for(int i=5; i<newPos.length; i++) { |
| // Find the number deleted to that |
| // point, and adjust |
| short preDeletePos = newPos[i]; |
| short newPosition = preDeletePos; |
| for(int j=0; j<preDeletePos; j++) { |
| if(zapRecords[j]) newPosition--; |
| } |
| |
| // Update the new position |
| newPos[i] = newPosition; |
| } |
| |
| // Zap the un-needed user font records |
| for(int i=5; i<newPos.length; i++) { |
| if(zapRecords[i]) { |
| workbook.getWorkbook().removeFontRecord( |
| frecs[i] |
| ); |
| } |
| } |
| |
| // Tell HSSFWorkbook that it needs to |
| // re-start its HSSFFontCache |
| workbook.resetFontCache(); |
| |
| // Update the cell styles to point at the |
| // new locations of the fonts |
| for(int i=0; i<workbook.getWorkbook().getNumExFormats(); i++) { |
| ExtendedFormatRecord xfr = workbook.getWorkbook().getExFormatAt(i); |
| xfr.setFontIndex( |
| newPos[ xfr.getFontIndex() ] |
| ); |
| } |
| |
| // Update the rich text strings to point at |
| // the new locations of the fonts |
| // Remember that one underlying unicode string |
| // may be shared by multiple RichTextStrings! |
| HashSet<UnicodeString> doneUnicodeStrings = new HashSet<>(); |
| for(int sheetNum=0; sheetNum<workbook.getNumberOfSheets(); sheetNum++) { |
| HSSFSheet s = workbook.getSheetAt(sheetNum); |
| for (Row row : s) { |
| for (Cell cell : row) { |
| if(cell.getCellType() == CellType.STRING) { |
| HSSFRichTextString rtr = (HSSFRichTextString)cell.getRichStringCellValue(); |
| UnicodeString u = rtr.getRawUnicodeString(); |
| |
| // Have we done this string already? |
| if(! doneUnicodeStrings.contains(u)) { |
| // Update for each new position |
| for(short i=5; i<newPos.length; i++) { |
| if(i != newPos[i]) { |
| u.swapFontUse(i, newPos[i]); |
| } |
| } |
| |
| // Mark as done |
| doneUnicodeStrings.add(u); |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Goes through the Wokrbook, optimising the cell styles |
| * by removing duplicate ones, and ones that aren't used. |
| * For best results, optimise the fonts via a call to |
| * {@link #optimiseFonts(HSSFWorkbook)} first. |
| * @param workbook The workbook in which to optimise the cell styles |
| */ |
| public static void optimiseCellStyles(HSSFWorkbook workbook) { |
| // Where each style has ended up, and if we need to |
| // delete the record for it. Start off with no change |
| short[] newPos = new short[workbook.getWorkbook().getNumExFormats()]; |
| boolean[] isUsed = new boolean[newPos.length]; |
| boolean[] zapRecords = new boolean[newPos.length]; |
| |
| // to speed up the optimisation for workbooks with a large number of |
| // styles we perform the isUserDefined() check only once as it is |
| // costly according to some profiling |
| boolean[] userDefined = new boolean[newPos.length]; |
| |
| // Get each style record, so we can do deletes |
| // without getting confused |
| ExtendedFormatRecord[] xfrs = new ExtendedFormatRecord[newPos.length]; |
| |
| for(int i=0; i<newPos.length; i++) { |
| isUsed[i] = false; |
| newPos[i] = (short)i; |
| zapRecords[i] = false; |
| |
| userDefined[i] = isUserDefined(workbook, i); |
| |
| xfrs[i] = workbook.getWorkbook().getExFormatAt(i); |
| } |
| |
| // Loop over each style, seeing if it is the same |
| // as an earlier one. If it is, point users of the |
| // later duplicate copy to the earlier one, and |
| // mark the later one as needing deleting |
| // Only work on user added ones, which come after 20 |
| for (int i = 21; i < newPos.length; i++) { |
| // Check this one for being a duplicate |
| // of an earlier one |
| int earlierDuplicate = -1; |
| for (int j = 0; j < i; j++) { |
| ExtendedFormatRecord xfCheck = workbook.getWorkbook().getExFormatAt(j); |
| if (xfCheck.equals(xfrs[i]) && |
| // never duplicate user defined styles |
| !userDefined[j]) { |
| earlierDuplicate = j; |
| break; |
| } |
| } |
| |
| // If we got a duplicate, mark it as such |
| if(earlierDuplicate != -1) { |
| newPos[i] = (short)earlierDuplicate; |
| zapRecords[i] = true; |
| } |
| } |
| |
| // Loop over all the cells in the file, and identify any user defined |
| // styles aren't actually being used (don't touch built-in ones) |
| for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) { |
| HSSFSheet s = workbook.getSheetAt(sheetNum); |
| for (Row row : s) { |
| for (Cell cellI : row) { |
| HSSFCell cell = (HSSFCell) cellI; |
| short oldXf = cell.getCellValueRecord().getXFIndex(); |
| // some documents contain invalid values here |
| if(oldXf < newPos.length) { |
| isUsed[oldXf] = true; |
| } |
| } |
| |
| // also mark row style as being used |
| short oldXf = ((HSSFRow) row).getRowRecord().getXFIndex(); |
| // some documents contain invalid values here |
| if(oldXf < newPos.length) { |
| isUsed[oldXf] = true; |
| } |
| } |
| |
| // also mark column styles as being used |
| for (int col = s.getSheet().getMinColumnIndex(); col <= s.getSheet().getMaxColumnIndex(); col++) { |
| short oldXf = s.getSheet().getXFIndexForColAt((short) col); |
| // some documents contain invalid values here |
| if(oldXf < newPos.length) { |
| isUsed[oldXf] = true; |
| } |
| } |
| } |
| |
| // Propagate isUsed for duplicates and always set user styles to being used to never optimize them away |
| for (int i = 21; i < isUsed.length; i++) { |
| // user defined styles are always "used" |
| if (isUserDefined(workbook, i)) { |
| isUsed[i] = true; |
| } |
| |
| // If we got a duplicate which is used, mark the one we're keeping as used |
| if(newPos[i] != i && isUsed[i]) { |
| isUsed[newPos[i]] = true; |
| } |
| } |
| |
| // Mark any that aren't used as needing zapping |
| for (int i=21; i<isUsed.length; i++) { |
| if (! isUsed[i]) { |
| // Un-used style, can be removed |
| zapRecords[i] = true; |
| newPos[i] = 0; |
| } |
| } |
| |
| // Update the new positions based on |
| // deletes that have occurred between |
| // the start and them |
| // Only work on user added ones, which come after 20 |
| for(int i=21; i<newPos.length; i++) { |
| // Find the number deleted to that |
| // point, and adjust |
| short preDeletePos = newPos[i]; |
| short newPosition = preDeletePos; |
| for(int j=0; j<preDeletePos; j++) { |
| if(zapRecords[j]) newPosition--; |
| } |
| |
| // Update the new position |
| newPos[i] = newPosition; |
| // also update StyleRecord and Parent-link |
| if (i != newPosition && newPosition != 0) { |
| workbook.getWorkbook().updateStyleRecord(i, newPosition); |
| |
| ExtendedFormatRecord exFormat = workbook.getWorkbook().getExFormatAt(i); |
| short oldParent = exFormat.getParentIndex(); |
| // some documents contain invalid values here |
| if(oldParent < newPos.length) { |
| short newParent = newPos[oldParent]; |
| exFormat.setParentIndex(newParent); |
| } |
| } |
| } |
| |
| // Zap the un-needed user style records |
| // removing by index, because removing by object may delete |
| // styles we did not intend to (the ones that _were_ duplicated and not the duplicates) |
| int max = newPos.length; |
| int removed = 0; // to adjust index after deletion |
| for(int i=21; i<max; i++) { |
| if(zapRecords[i + removed]) { |
| workbook.getWorkbook().removeExFormatRecord(i); |
| i--; |
| max--; |
| removed++; |
| } |
| } |
| |
| // Finally, update the cells to point at their new extended format records |
| for (int sheetNum = 0; sheetNum < workbook.getNumberOfSheets(); sheetNum++) { |
| HSSFSheet s = workbook.getSheetAt(sheetNum); |
| for (Row row : s) { |
| for (Cell cell : row) { |
| short oldXf = ((HSSFCell) cell).getCellValueRecord().getXFIndex(); |
| // some documents contain invalid values here |
| if(oldXf >= newPos.length) { |
| continue; |
| } |
| HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); |
| cell.setCellStyle(newStyle); |
| } |
| |
| // adjust row column style |
| short oldXf = ((HSSFRow) row).getRowRecord().getXFIndex(); |
| // some documents contain invalid values here |
| if(oldXf >= newPos.length) { |
| continue; |
| } |
| HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); |
| row.setRowStyle(newStyle); |
| } |
| |
| // adjust cell column style |
| for (int col = s.getSheet().getMinColumnIndex(); col <= s.getSheet().getMaxColumnIndex(); col++) { |
| short oldXf = s.getSheet().getXFIndexForColAt((short) col); |
| // some documents contain invalid values here |
| if(oldXf >= newPos.length) { |
| continue; |
| } |
| HSSFCellStyle newStyle = workbook.getCellStyleAt(newPos[oldXf]); |
| s.setDefaultColumnStyle(col, newStyle); |
| } |
| } |
| } |
| |
| private static boolean isUserDefined(HSSFWorkbook workbook, int index) { |
| StyleRecord styleRecord = workbook.getWorkbook().getStyleRecord(index); |
| return styleRecord != null && |
| !styleRecord.isBuiltin() && |
| styleRecord.getName() != null; |
| } |
| } |