blob: 9310a0429e132dedb02f60b2b2299dc9290448a8 [file] [log] [blame]
/*
* This software was produced for the U. S. Government
* under Contract No. W15P7T-11-C-F600, and is
* subject to the Rights in Noncommercial Computer Software
* and Noncommercial Computer Software Documentation
* Clause 252.227-7014 (JUN 1995)
*
* Copyright 2013 The MITRE Corporation. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.tagger;
public interface TagClusterReducer {
/**
* Reduces the linked-list to only those tags that should be emitted
* @param head not null; 1-element array to head which isn't null either
*/
void reduce(TagLL[] head);
static final TagClusterReducer ALL = new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {
}
};
static final TagClusterReducer NO_SUB = new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {
//loop forward over all tags
for (TagLL tag = head[0].nextTag; tag != null; tag = tag.nextTag) {
//loop backwards over prev tags from this tag
for (TagLL tPrev = tag.prevTag; tPrev != null; tPrev = tPrev.prevTag) {
assert tPrev.startOffset <= tag.startOffset;
//if a previous tag's endOffset is <= this one's, tForward can be removed
if (tPrev.endOffset >= tag.endOffset) {
tag.removeLL();
break;
} else if (tPrev.startOffset == tag.startOffset) {
tPrev.removeLL();
//continue; 'tag' is still valid
}
}
}
}
};
static final TagClusterReducer LONGEST_DOMINANT_RIGHT = new TagClusterReducer() {
@Override
public void reduce(TagLL[] head) {
//--Optimize for common single-tag case
if (head[0].nextTag == null)
return;
while (true) {
//--Find longest not already marked
TagLL longest = null;
for (TagLL t = head[0]; t != null; t = t.nextTag) {
if (!t.mark && (longest == null || t.charLen() >= longest.charLen()))
longest = t;
}
if (longest == null)
break;
//--Mark longest (so we return it eventually)
longest.mark = true;
//--Remove tags overlapping this longest
for (TagLL t = head[0]; t != null; t = t.nextTag) {
if (t.mark)
continue;
if (t.overlaps(longest)) {
t.removeLL();
} else if (t.startOffset >= longest.endOffset) {
break;//no subsequent can possibly overlap
}
}
}//loop
//all-remaining should be marked
// for (TagLL t = head; t != null; t = t.nextTag) {
// assert t.mark;
//// if (!t.mark) {
//// t.removeLL();
//// if (head == t)
//// head = t.nextTag;
//// }
// }
assert head[0].mark;
}
};
}