| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.ruta.textruler.learner.whisk.token; |
| |
| import org.apache.uima.ruta.textruler.core.TextRulerBasicLearner; |
| import org.apache.uima.ruta.textruler.core.TextRulerExample; |
| import org.apache.uima.ruta.textruler.core.TextRulerMultiSlotRule; |
| import org.apache.uima.ruta.textruler.core.TextRulerRuleItem; |
| import org.apache.uima.ruta.textruler.core.TextRulerRulePattern; |
| import org.apache.uima.ruta.textruler.core.TextRulerSlotPattern; |
| import org.apache.uima.ruta.textruler.core.TextRulerStatisticsCollector; |
| import org.apache.uima.ruta.textruler.core.TextRulerTarget; |
| import org.apache.uima.ruta.textruler.core.TextRulerToolkit; |
| |
| public class WhiskRule extends TextRulerMultiSlotRule { |
| |
| TextRulerExample seedExample; |
| |
| public WhiskRule(WhiskRule copyFrom) { |
| super(copyFrom); |
| seedExample = copyFrom.seedExample; |
| } |
| |
| public WhiskRule(TextRulerBasicLearner parentAlgorithm, TextRulerTarget target, |
| TextRulerExample seedExample) { |
| super(parentAlgorithm, target); |
| this.seedExample = seedExample; |
| } |
| |
| @Override |
| public WhiskRule copy() { |
| return new WhiskRule(this); |
| } |
| |
| public double getLaplacian() { |
| int e = 0; |
| int n = 0; |
| |
| if (coveringStatistics != null) { |
| e = coveringStatistics.getCoveredNegativesCount(); |
| n = coveringStatistics.getCoveredNegativesCount() |
| + coveringStatistics.getCoveredPositivesCount(); |
| } |
| return ((double) e + 1) / ((double) n + 1); |
| } |
| |
| public TextRulerExample getSeedExample() { |
| return seedExample; |
| } |
| |
| @Override |
| public void setCoveringStatistics(TextRulerStatisticsCollector c) { |
| super.setCoveringStatistics(c); |
| if (TextRulerToolkit.DEBUG && c != null) { |
| if (!c.getCoveredPositiveExamples().contains(seedExample)) { |
| TextRulerToolkit.log("ERROR, A WHISK RULE MUST COVER AT LEAST ITS SEED EXAMPLE!"); |
| TextRulerToolkit.log("\tRULE: " + getRuleString()); |
| } |
| } |
| } |
| |
| public boolean containsTerm(WhiskRuleItem term) { |
| for (TextRulerSlotPattern sp : slotPatterns) { |
| for (TextRulerRuleItem i : sp.preFillerPattern) |
| if (i.equals(term)) |
| return true; |
| for (TextRulerRuleItem i : sp.fillerPattern) |
| if (i.equals(term)) |
| return true; |
| for (TextRulerRuleItem i : sp.postFillerPattern) |
| if (i.equals(term)) |
| return true; |
| } |
| return false; |
| } |
| |
| public WhiskRuleItem searchItemWithTermNumber(int no) { |
| for (TextRulerSlotPattern sp : slotPatterns) { |
| for (TextRulerRuleItem i : sp.preFillerPattern) { |
| if (((WhiskRuleItem) i).getTermNumberInExample() == no) { |
| return (WhiskRuleItem) i; |
| } |
| } |
| for (TextRulerRuleItem i : sp.fillerPattern) { |
| if (((WhiskRuleItem) i).getTermNumberInExample() == no) { |
| return (WhiskRuleItem) i; |
| } |
| } |
| for (TextRulerRuleItem i : sp.postFillerPattern) { |
| if (((WhiskRuleItem) i).getTermNumberInExample() == no) { |
| return (WhiskRuleItem) i; |
| } |
| } |
| } |
| return null; |
| } |
| |
| // TODO this could be moved to the core framework (TextRulerMultiSlotRule) |
| public WhiskRuleItem searchNeighborOfItem(WhiskRuleItem item, boolean goToLeft) { |
| int slotIndex = -1; |
| int patternIndex = -1; |
| int slotI = 0; |
| for (TextRulerSlotPattern sp : slotPatterns) { |
| for (TextRulerRuleItem it : sp.preFillerPattern) { |
| if (it == item) { |
| slotIndex = slotI; |
| patternIndex = 0; // 0=preFiller |
| break; |
| } |
| } |
| if (slotIndex < 0) { |
| for (TextRulerRuleItem it : sp.fillerPattern) { |
| if (it == item) { |
| slotIndex = slotI; |
| patternIndex = 1; // 1=filler |
| break; |
| } |
| } |
| } |
| if (slotIndex < 0) { |
| for (TextRulerRuleItem it : sp.postFillerPattern) { |
| if (it == item) { |
| slotIndex = slotI; |
| patternIndex = 2; // 2=postFiller |
| break; |
| } |
| } |
| } |
| if (slotIndex >= 0) { |
| break; |
| } |
| } |
| if (slotIndex < 0) // we didn't even find the item in our rule ?! how |
| // can this happen ? |
| return null; |
| |
| TextRulerRulePattern currentPattern = getPattern(slotIndex, patternIndex); |
| while (currentPattern != null) { |
| int startIndex = currentPattern.indexOf(item); // this is only >= 0 |
| // for the first |
| // pattern... |
| if (!goToLeft) // walk forward... |
| { |
| int startSearchFromIndex = startIndex + 1; |
| if (startSearchFromIndex < currentPattern.size()) |
| return (WhiskRuleItem) currentPattern.get(startSearchFromIndex); |
| else // skip to next pattern |
| { |
| patternIndex++; |
| if (patternIndex > 2) { |
| patternIndex = 0; |
| slotIndex++; |
| if (slotIndex >= slotPatterns.size()) |
| return null; // not found! |
| } |
| currentPattern = getPattern(slotIndex, patternIndex); |
| } |
| } else { |
| int startSearchFromIndex = startIndex >= 0 ? startIndex - 1 : currentPattern.size() - 1; |
| if (startSearchFromIndex >= 0 && currentPattern.size() > 0) |
| return (WhiskRuleItem) currentPattern.get(startSearchFromIndex); |
| else // skip to previous pattern |
| { |
| patternIndex--; |
| if (patternIndex < 0) { |
| patternIndex = 2; |
| slotIndex--; |
| if (slotIndex < 0) |
| return null; // not found! |
| } |
| currentPattern = getPattern(slotIndex, patternIndex); |
| } |
| } |
| } |
| return null; |
| } |
| |
| private TextRulerRulePattern getPattern(int slotIndex, int patternIndex) { |
| TextRulerSlotPattern sp = slotPatterns.get(slotIndex); |
| if (patternIndex == 0) |
| return sp.preFillerPattern; |
| else if (patternIndex == 1) |
| return sp.fillerPattern; |
| else if (patternIndex == 2) |
| return sp.postFillerPattern; |
| else |
| return null; |
| } |
| |
| public int totalConstraintPoints() { |
| int result = 0; |
| for (TextRulerSlotPattern sl : slotPatterns) { |
| for (TextRulerRuleItem i : sl.preFillerPattern) { |
| result += ((WhiskRuleItem) i).constraintPoints(); |
| } |
| for (TextRulerRuleItem i : sl.fillerPattern) { |
| result += ((WhiskRuleItem) i).constraintPoints(); |
| } |
| for (TextRulerRuleItem i : sl.postFillerPattern) { |
| result += ((WhiskRuleItem) i).constraintPoints(); |
| } |
| } |
| return result; |
| } |
| |
| } |