blob: 2e5f8f21c4f2d38f9112f7510cd2f2b28e15430d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.parser.microsoft;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hwpf.converter.NumberFormatter;
public abstract class AbstractListManager {
private final static String BULLET = "\u00b7";
protected Map<Integer, ParagraphLevelCounter> listLevelMap =
new HashMap<>();
protected Map<Integer, LevelTuple[]> overrideTupleMap = new HashMap<>();
//helper class that is docx/doc format agnostic
protected static class ParagraphLevelCounter {
//counts can == 0 if the format is decimal, make sure
//that flag values are < 0
private final Integer NOT_SEEN_YET = -1;
private final Integer FIRST_SKIPPED = -2;
private final LevelTuple[] levelTuples;
Pattern LEVEL_INTERPOLATOR = Pattern.compile("%(\\d+)");
private List<Integer> counts = new ArrayList<>();
private int lastLevel = -1;
public ParagraphLevelCounter(LevelTuple[] levelTuples) {
this.levelTuples = levelTuples;
}
public int getNumberOfLevels() {
return levelTuples.length;
}
/**
* Apply this to every numbered paragraph in order.
*
* @param levelNumber level number that is being incremented
* @return the new formatted number string for this level
*/
public String incrementLevel(int levelNumber, LevelTuple[] overrideLevelTuples) {
for (int i = lastLevel + 1; i < levelNumber; i++) {
if (i >= counts.size()) {
int val = getStart(i, overrideLevelTuples);
counts.add(i, val);
} else {
int count = counts.get(i);
if (count == NOT_SEEN_YET) {
count = getStart(i, overrideLevelTuples);
counts.set(i, count);
}
}
}
if (levelNumber < counts.size()) {
resetAfter(levelNumber, overrideLevelTuples);
int count = counts.get(levelNumber);
if (count == NOT_SEEN_YET) {
count = getStart(levelNumber, overrideLevelTuples);
} else {
count++;
}
counts.set(levelNumber, count);
lastLevel = levelNumber;
return format(levelNumber, overrideLevelTuples);
}
counts.add(levelNumber, getStart(levelNumber, overrideLevelTuples));
lastLevel = levelNumber;
return format(levelNumber, overrideLevelTuples);
}
/**
* @param level which level to format
* @return the string that represents the number and the surrounding text for this paragraph
*/
private String format(int level, LevelTuple[] overrideLevelTuples) {
if (level < 0 || level >= levelTuples.length) {
//log?
return "";
}
boolean isLegal = (overrideLevelTuples != null) ? overrideLevelTuples[level].isLegal :
levelTuples[level].isLegal;
//short circuit bullet
String numFmt = getNumFormat(level, isLegal, overrideLevelTuples);
if ("bullet".equals(numFmt)) {
return BULLET + " ";
}
String lvlText =
(overrideLevelTuples == null || overrideLevelTuples[level].lvlText == null) ?
levelTuples[level].lvlText : overrideLevelTuples[level].lvlText;
StringBuilder sb = new StringBuilder();
Matcher m = LEVEL_INTERPOLATOR.matcher(lvlText);
int last = 0;
while (m.find()) {
sb.append(lvlText, last, m.start());
String lvlString = m.group(1);
int lvlNum = -1;
try {
lvlNum = Integer.parseInt(lvlString);
} catch (NumberFormatException e) {
//swallow
}
String numString = "";
//need to subtract 1 because, e.g. %1 is the format
//for the number at array offset 0
numString = formatNum(lvlNum - 1, isLegal, overrideLevelTuples);
sb.append(numString);
last = m.end();
}
sb.append(lvlText.substring(last));
if (sb.length() > 0) {
//TODO: add in character after number
sb.append(" ");
}
return sb.toString();
}
//actual level number; can return empty string if numberformatter fails
private String formatNum(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) {
int numFmtStyle = 0;
String numFmt = getNumFormat(lvlNum, isLegal, overrideLevelTuples);
int count = getCount(lvlNum);
if (count < 0) {
count = 1;
}
if ("lowerLetter".equals(numFmt)) {
numFmtStyle = 4;
} else if ("lowerRoman".equals(numFmt)) {
numFmtStyle = 2;
} else if ("decimal".equals(numFmt)) {
numFmtStyle = 0;
} else if ("upperLetter".equals(numFmt)) {
numFmtStyle = 3;
} else if ("upperRoman".equals(numFmt)) {
numFmtStyle = 1;
} else if ("bullet".equals(numFmt)) {
return "";
//not yet handled by NumberFormatter...TODO: add to NumberFormatter?
} else if ("ordinal".equals(numFmt)) {
return ordinalize(count);
} else if ("decimalZero".equals(numFmt)) {
return "0" + NumberFormatter.getNumber(count, 0);
} else if ("none".equals(numFmt)) {
return "";
}
try {
return NumberFormatter.getNumber(count, numFmtStyle);
} catch (IllegalArgumentException e) {
return "";
}
}
private String ordinalize(int count) {
//this is only good for locale == English
String countString = Integer.toString(count);
if (countString.endsWith("1")) {
return countString + "st";
} else if (countString.endsWith("2")) {
return countString + "nd";
} else if (countString.endsWith("3")) {
return countString + "rd";
}
return countString + "th";
}
private String getNumFormat(int lvlNum, boolean isLegal, LevelTuple[] overrideLevelTuples) {
if (lvlNum < 0 || lvlNum >= levelTuples.length) {
//log?
return "decimal";
}
if (isLegal) {
//return decimal no matter the level if isLegal is true
return "decimal";
}
return (overrideLevelTuples == null || overrideLevelTuples[lvlNum].numFmt == null) ?
levelTuples[lvlNum].numFmt : overrideLevelTuples[lvlNum].numFmt;
}
private int getCount(int lvlNum) {
if (lvlNum < 0 || lvlNum >= counts.size()) {
//log?
return 1;
}
return counts.get(lvlNum);
}
private void resetAfter(int startlevelNumber, LevelTuple[] overrideLevelTuples) {
for (int levelNumber = startlevelNumber + 1; levelNumber < counts.size();
levelNumber++) {
int cnt = counts.get(levelNumber);
if (cnt == NOT_SEEN_YET) {
//do nothing
} else if (cnt == FIRST_SKIPPED) {
//do nothing
} else if (levelTuples.length > levelNumber) {
//never reset if restarts == 0
int restart = (overrideLevelTuples == null ||
overrideLevelTuples[levelNumber].restart < 0) ?
levelTuples[levelNumber].restart :
overrideLevelTuples[levelNumber].restart;
if (restart == 0) {
return;
} else if (restart == -1 || startlevelNumber <= restart - 1) {
counts.set(levelNumber, NOT_SEEN_YET);
} else {
//do nothing/don't reset
}
} else {
//reset!
counts.set(levelNumber, NOT_SEEN_YET);
}
}
}
private int getStart(int levelNumber, LevelTuple[] overrideLevelTuples) {
if (levelNumber >= levelTuples.length) {
return 1;
} else {
return (overrideLevelTuples == null || overrideLevelTuples[levelNumber].start < 0) ?
levelTuples[levelNumber].start : overrideLevelTuples[levelNumber].start;
}
}
}
protected static class LevelTuple {
private final int start;
private final int restart;
private final String lvlText;
private final String numFmt;
private final boolean isLegal;
public LevelTuple(String lvlText) {
this.lvlText = lvlText;
start = 1;
restart = -1;
numFmt = "decimal";
isLegal = false;
}
public LevelTuple(int start, int restart, String lvlText, String numFmt, boolean isLegal) {
this.start = start;
this.restart = restart;
this.lvlText = lvlText;
this.numFmt = numFmt;
this.isLegal = isLegal;
}
}
}