| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.uhighlight; |
| |
| import java.text.BreakIterator; |
| import java.text.CharacterIterator; |
| import java.text.StringCharacterIterator; |
| |
| /** |
| * Virtually slices the text on both sides of every occurrence of the specified character. If the slice is 0-length |
| * which happens for adjacent slice characters or when they are at the beginning or end, that character is reported |
| * as a boundary. |
| * For every slice between the specified characters, it is further processed with a specified |
| * BreakIterator. A consequence is that the enclosed BreakIterator will never "see" the splitting character. |
| * <br> |
| * <em>Note: {@link #setText(CharacterIterator)} is unsupported. Use the string version.</em> |
| * |
| * @lucene.experimental |
| */ |
| public class SplittingBreakIterator extends BreakIterator { |
| private final BreakIterator baseIter; |
| private final char sliceChar; |
| |
| private String text; |
| private int sliceStartIdx; |
| private int sliceEndIdx; |
| private int current; |
| |
| public SplittingBreakIterator(BreakIterator baseIter, char sliceChar) { |
| this.baseIter = baseIter; |
| this.sliceChar = sliceChar; |
| } |
| |
| @Override |
| public void setText(CharacterIterator newText) { |
| throw new UnsupportedOperationException("unexpected"); |
| } |
| |
| @Override |
| public void setText(String newText) { |
| this.text = newText; |
| first(); |
| } |
| |
| @Override |
| public CharacterIterator getText() { |
| StringCharacterIterator charIter = new StringCharacterIterator(text); |
| // API doesn't say what the state should be but it should probably be at the current index. |
| charIter.setIndex(current()); |
| return charIter; |
| } |
| |
| @Override |
| public int current() { |
| assert current != DONE; |
| return current; // MUST be updated by the other methods when result isn't DONE. |
| } |
| |
| @Override |
| public int first() { |
| sliceStartIdx = 0; |
| sliceEndIdx = text.indexOf(sliceChar); |
| if (sliceEndIdx == -1) { |
| sliceEndIdx = text.length(); |
| } |
| if (sliceStartIdx == sliceEndIdx) { |
| return current = sliceStartIdx; |
| } |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| return current = sliceStartIdx + baseIter.current();// since setText() sets to first(), just grab current() |
| } |
| |
| @Override |
| public int last() { |
| sliceEndIdx = text.length(); |
| sliceStartIdx = text.lastIndexOf(sliceChar); |
| if (sliceStartIdx == -1) { |
| sliceStartIdx = 0; |
| } else { |
| sliceStartIdx++;//past sliceChar |
| } |
| if (sliceEndIdx == sliceStartIdx) { |
| return current = sliceEndIdx; |
| } |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| return current = sliceStartIdx + baseIter.last(); |
| } |
| |
| @Override |
| public int next() { |
| int prevCurrent = current; |
| current = sliceStartIdx == sliceEndIdx ? DONE : baseIter.next(); |
| if (current != DONE) { |
| return current = current + sliceStartIdx; |
| } |
| if (sliceEndIdx >= text.length()) { |
| current = prevCurrent;//keep current where it is |
| return DONE; |
| } |
| sliceStartIdx = sliceEndIdx + 1; |
| sliceEndIdx = text.indexOf(sliceChar, sliceStartIdx); |
| if (sliceEndIdx == -1) { |
| sliceEndIdx = text.length(); |
| } |
| if (sliceStartIdx == sliceEndIdx) { |
| return current = sliceStartIdx; |
| } |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| return current = sliceStartIdx + baseIter.current();//use current() since at first() already |
| } |
| |
| @Override |
| public int previous() { // note: closely follows next() but reversed |
| int prevCurrent = current; |
| current = sliceStartIdx == sliceEndIdx ? DONE : baseIter.previous(); |
| if (current != DONE) { |
| return current = current + sliceStartIdx; |
| } |
| if (sliceStartIdx == 0) { |
| current = prevCurrent;//keep current where it is |
| return DONE; |
| } |
| sliceEndIdx = sliceStartIdx - 1; |
| sliceStartIdx = text.lastIndexOf(sliceChar, sliceEndIdx - 1); |
| if (sliceStartIdx == -1) { |
| sliceStartIdx = 0; |
| } else { |
| sliceStartIdx++;//past sliceChar |
| } |
| if (sliceStartIdx == sliceEndIdx) { |
| return current = sliceStartIdx; |
| } |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| return current = sliceStartIdx + baseIter.last(); |
| } |
| |
| @Override |
| public int following(int offset) { |
| // if the offset is not in this slice, update the slice |
| if (offset + 1 < sliceStartIdx || offset + 1 > sliceEndIdx) { |
| if (offset == text.length()) { // DONE condition |
| last(); // because https://bugs.openjdk.java.net/browse/JDK-8015110 |
| return DONE; |
| } |
| sliceStartIdx = text.lastIndexOf(sliceChar, offset);//no +1 |
| if (sliceStartIdx == -1) { |
| sliceStartIdx = 0; |
| } else { |
| sliceStartIdx++;//move past separator |
| } |
| sliceEndIdx = text.indexOf(sliceChar, Math.max(offset + 1, sliceStartIdx)); |
| if (sliceEndIdx == -1) { |
| sliceEndIdx = text.length(); |
| } |
| if (sliceStartIdx != sliceEndIdx) {//otherwise, adjacent separator or separator at end |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| } |
| } |
| |
| // lookup following() in this slice: |
| if (sliceStartIdx == sliceEndIdx) { |
| return current = offset + 1; |
| } else { |
| // note: following() can never be first() if the first character is a boundary (it usually is). |
| // So we have to check if we should call first() instead of following(): |
| if (offset == sliceStartIdx - 1) { |
| // the first boundary following this offset is the very first boundary in this slice |
| return current = sliceStartIdx + baseIter.first(); |
| } else { |
| return current = sliceStartIdx + baseIter.following(offset - sliceStartIdx); |
| } |
| } |
| } |
| |
| @Override |
| public int preceding(int offset) { // note: closely follows following() but reversed |
| if (offset - 1 < sliceStartIdx || offset - 1 > sliceEndIdx) { |
| if (offset == 0) { // DONE condition |
| first(); // because https://bugs.openjdk.java.net/browse/JDK-8015110 |
| return DONE; |
| } |
| sliceEndIdx = text.indexOf(sliceChar, offset);//no -1 |
| if (sliceEndIdx == -1) { |
| sliceEndIdx = text.length(); |
| } |
| sliceStartIdx = text.lastIndexOf(sliceChar, offset - 1); |
| if (sliceStartIdx == -1) { |
| sliceStartIdx = 0; |
| } else { |
| sliceStartIdx = Math.min(sliceStartIdx + 1, sliceEndIdx); |
| } |
| if (sliceStartIdx != sliceEndIdx) {//otherwise, adjacent separator or separator at end |
| baseIter.setText(text.substring(sliceStartIdx, sliceEndIdx)); |
| } |
| } |
| // lookup preceding() in this slice: |
| if (sliceStartIdx == sliceEndIdx) { |
| return current = offset - 1; |
| } else { |
| // note: preceding() can never be last() if the last character is a boundary (it usually is). |
| // So we have to check if we should call last() instead of preceding(): |
| if (offset == sliceEndIdx + 1) { |
| // the last boundary preceding this offset is the very last boundary in this slice |
| return current = sliceStartIdx + baseIter.last(); |
| } else { |
| return current = sliceStartIdx + baseIter.preceding(offset - sliceStartIdx); |
| } |
| } |
| } |
| |
| @Override |
| public int next(int n) { |
| if (n < 0) { |
| for (int i = 0; i < -n; i++) { |
| if (previous() == DONE) { |
| return DONE; |
| } |
| } |
| } else { |
| for (int i = 0; i < n; i++) { |
| if (next() == DONE) { |
| return DONE; |
| } |
| } |
| } |
| return current(); |
| } |
| |
| |
| } |