| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tika.parser.microsoft; |
| |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.apache.poi.hwpf.converter.NumberFormatter; |
| |
| public abstract class AbstractListManager { |
| private final static String BULLET = "\u00b7"; |
| |
| protected Map<Integer, ParagraphLevelCounter> listLevelMap = |
| new HashMap<>(); |
| protected Map<Integer, LevelTuple[]> overrideTupleMap = new HashMap<>(); |
| |
| //helper class that is docx/doc format agnostic |
| protected static class ParagraphLevelCounter { |
| |
| //counts can == 0 if the format is decimal, make sure |
| //that flag values are < 0 |
| private final Integer NOT_SEEN_YET = -1; |
| private final Integer FIRST_SKIPPED = -2; |
| private final LevelTuple[] levelTuples; |
| Pattern LEVEL_INTERPOLATOR = Pattern.compile("%(\\d+)"); |
| private List<Integer> counts = new ArrayList<>(); |
| private int lastLevel = -1; |
| |
| public ParagraphLevelCounter(LevelTuple[] levelTuples) { |
| this.levelTuples = levelTuples; |
| } |
| |
| public int getNumberOfLevels() { |
| return levelTuples.length; |
| } |
| |
| /** |
| * Apply this to every numbered paragraph in order. |
| * |
| * @param levelNumber level number that is being incremented |
| * @return the new formatted number string for this level |
| */ |
| public String incrementLevel(int levelNumber, LevelTuple[] overrideLevelTuples) { |
| |
| for (int i = lastLevel + 1; i < levelNumber; i++) { |
| if (i >= counts.size()) { |
| int val = getStart(i, overrideLevelTuples); |
| counts.add(i, val); |
| } else { |
| int count = counts.get(i); |
| if (count == NOT_SEEN_YET) { |
| count = getStart(i, overrideLevelTuples); |
| counts.set(i, count); |
| } |
| } |
| } |
| |
| if (levelNumber < counts.size()) { |
| resetAfter(levelNumber, overrideLevelTuples); |
| int count = counts.get(levelNumber); |
| if (count == NOT_SEEN_YET) { |
| count = getStart(levelNumber, overrideLevelTuples); |
| } else { |
| count++; |
| } |
| counts.set(levelNumber, count); |
| lastLevel = levelNumber; |
| return format(levelNumber, overrideLevelTuples); |
| } |
| |
| counts.add(levelNumber, getStart(levelNumber, overrideLevelTuples)); |
| lastLevel = levelNumber; |
| return format(levelNumber, overrideLevelTuples); |
| } |
| |
| /** |
| * @param level which level to format |
| * @return the string that represents the number and the surrounding text for this paragraph |
| */ |
| private String format(int level, LevelTuple[] overrideLevelTuples) { |
| if (level < 0 || level >= levelTuples.length) { |
| //log? |
| return ""; |
| } |
| boolean isLegal = (overrideLevelTuples != null) ? overrideLevelTuples[level].isLegal : |
| levelTuples[level].isLegal; |
| //short circuit bullet |
| String numFmt = getNumFormat(level, isLegal, overrideLevelTuples); |
| if ("bullet".equals(numFmt)) { |
| return BULLET + " "; |
| } |
| |
| String lvlText = |
| (overrideLevelTuples == null || overrideLevelTuples[level].lvlText == null) ? |
| levelTuples[level].lvlText : overrideLevelTuples[level].lvlText; |
| StringBuilder sb = new StringBuilder(); |
| Matcher m = LEVEL_INTERPOLATOR.matcher(lvlText); |
| int last = 0; |
| while (m.find()) { |
| sb.append(lvlText, last, m.start()); |
| String lvlString = m.group(1); |
| int lvlNum = -1; |
| try { |
| lvlNum = Integer.parseInt(lvlString); |
| } catch (NumberFormatException e) { |
| //swallow |
| } |
| String numString = ""; |
| //need to subtract 1 because, e.g. %1 is the format |
| //for the number at array offset 0 |
| numString = formatNum(lvlNum - 1, isLegal, overrideLevelTuples); |
| |
| sb.append(numString); |
| last = m.end(); |
| } |
| sb.append(lvlText.substring(last)); |
| if (sb.length() > 0) { |
| //TODO: add in character after number |
| sb.append(" "); |
| } |
| return sb.toString(); |
| } |
| |
| //actual level number; can return empty string if numberformatter fails |
| private String formatNum(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) { |
| |
| int numFmtStyle = 0; |
| String numFmt = getNumFormat(lvlNum, isLegal, overrideLevelTuples); |
| |
| int count = getCount(lvlNum); |
| if (count < 0) { |
| count = 1; |
| } |
| if ("lowerLetter".equals(numFmt)) { |
| numFmtStyle = 4; |
| } else if ("lowerRoman".equals(numFmt)) { |
| numFmtStyle = 2; |
| } else if ("decimal".equals(numFmt)) { |
| numFmtStyle = 0; |
| } else if ("upperLetter".equals(numFmt)) { |
| numFmtStyle = 3; |
| } else if ("upperRoman".equals(numFmt)) { |
| numFmtStyle = 1; |
| } else if ("bullet".equals(numFmt)) { |
| return ""; |
| //not yet handled by NumberFormatter...TODO: add to NumberFormatter? |
| } else if ("ordinal".equals(numFmt)) { |
| return ordinalize(count); |
| } else if ("decimalZero".equals(numFmt)) { |
| return "0" + NumberFormatter.getNumber(count, 0); |
| } else if ("none".equals(numFmt)) { |
| return ""; |
| } |
| try { |
| return NumberFormatter.getNumber(count, numFmtStyle); |
| } catch (IllegalArgumentException e) { |
| return ""; |
| } |
| } |
| |
| private String ordinalize(int count) { |
| //this is only good for locale == English |
| String countString = Integer.toString(count); |
| if (countString.endsWith("1")) { |
| return countString + "st"; |
| } else if (countString.endsWith("2")) { |
| return countString + "nd"; |
| } else if (countString.endsWith("3")) { |
| return countString + "rd"; |
| } |
| return countString + "th"; |
| } |
| |
| private String getNumFormat(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) { |
| if (lvlNum < 0 || lvlNum >= levelTuples.length) { |
| //log? |
| return "decimal"; |
| } |
| if (isLegal) { |
| //return decimal no matter the level if isLegal is true |
| return "decimal"; |
| } |
| return (overrideLevelTuples == null || overrideLevelTuples[lvlNum].numFmt == null) ? |
| levelTuples[lvlNum].numFmt : overrideLevelTuples[lvlNum].numFmt; |
| } |
| |
| private int getCount(int lvlNum) { |
| if (lvlNum < 0 || lvlNum >= counts.size()) { |
| //log? |
| return 1; |
| } |
| return counts.get(lvlNum); |
| } |
| |
| private void resetAfter(int startlevelNumber, LevelTuple[] overrideLevelTuples) { |
| for (int levelNumber = startlevelNumber + 1; levelNumber < counts.size(); |
| levelNumber++) { |
| int cnt = counts.get(levelNumber); |
| if (cnt == NOT_SEEN_YET) { |
| //do nothing |
| } else if (cnt == FIRST_SKIPPED) { |
| //do nothing |
| } else if (levelTuples.length > levelNumber) { |
| //never reset if restarts == 0 |
| int restart = (overrideLevelTuples == null || |
| overrideLevelTuples[levelNumber].restart < 0) ? |
| levelTuples[levelNumber].restart : |
| overrideLevelTuples[levelNumber].restart; |
| if (restart == 0) { |
| return; |
| } else if (restart == -1 || startlevelNumber <= restart - 1) { |
| counts.set(levelNumber, NOT_SEEN_YET); |
| } else { |
| //do nothing/don't reset |
| } |
| } else { |
| //reset! |
| counts.set(levelNumber, NOT_SEEN_YET); |
| } |
| } |
| } |
| |
| private int getStart(int levelNumber, LevelTuple[] overrideLevelTuples) { |
| if (levelNumber >= levelTuples.length) { |
| return 1; |
| } else { |
| return (overrideLevelTuples == null || overrideLevelTuples[levelNumber].start < 0) ? |
| levelTuples[levelNumber].start : overrideLevelTuples[levelNumber].start; |
| } |
| } |
| } |
| |
| protected static class LevelTuple { |
| private final int start; |
| private final int restart; |
| private final String lvlText; |
| private final String numFmt; |
| private final boolean isLegal; |
| |
| public LevelTuple(String lvlText) { |
| this.lvlText = lvlText; |
| start = 1; |
| restart = -1; |
| numFmt = "decimal"; |
| isLegal = false; |
| } |
| |
| public LevelTuple(int start, int restart, String lvlText, String numFmt, boolean isLegal) { |
| this.start = start; |
| this.restart = restart; |
| this.lvlText = lvlText; |
| this.numFmt = numFmt; |
| this.isLegal = isLegal; |
| } |
| } |
| } |