| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.jsmlearning; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| |
| import org.apache.commons.collections.ListUtils; |
| |
| import opennlp.tools.parse_thicket.pattern_structure.LinguisticPatternStructure; |
| import opennlp.tools.similarity.apps.utils.Pair; |
| import opennlp.tools.textsimilarity.ParseTreeChunk; |
| |
| public class JSMLearnerOnLatticeWithDeduction extends JSMLearnerOnLatticeBase{ |
| final List<JSMDecision> accumulatedJSMResults = new ArrayList<>(); |
| |
| |
| |
| public JSMDecision buildLearningModel(List<String> posTexts, List<String> negTexts, |
| String unknown, String[] separationKeywords){ |
| psPos = new LinguisticPatternStructure(0,0); psNeg = new LinguisticPatternStructure(0,0); |
| if (separationKeywords!=null){ // re-sort by occurrence of separation keyword |
| Pair<List<String>, List<String>> pair = reGroupByOccurrenceOfSeparationKeyword(posTexts, negTexts, separationKeywords ); |
| posTexts = pair.getFirst(); negTexts = pair.getSecond(); |
| } |
| |
| List<List<List<ParseTreeChunk>>> lingRepsPos = new ArrayList<>(), |
| lingRepsNeg = new ArrayList<>(); |
| for(String text: posTexts) |
| lingRepsPos.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text)); |
| |
| for(String text: negTexts) |
| lingRepsNeg.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text)); |
| |
| LinkedHashSet<Integer> obj; |
| int i=0; |
| for(List<List<ParseTreeChunk>> chunk: lingRepsPos){ |
| obj = new LinkedHashSet<>(); |
| obj.add(i); |
| psPos.AddIntent(chunk, obj, 0); |
| i++; |
| } |
| i=0; |
| for(List<List<ParseTreeChunk>> chunk: lingRepsNeg){ |
| obj = new LinkedHashSet<>(); |
| obj.add(i); |
| psNeg.AddIntent(chunk, obj, 0); |
| i++; |
| } |
| |
| List<List<ParseTreeChunk>> chunksUnknown = chunk_maker.formGroupedPhrasesFromChunksForPara(unknown); |
| List<List<List<ParseTreeChunk>>> posIntersections = new ArrayList<>(), |
| negIntersections = new ArrayList<>(); |
| List<List<ParseTreeChunk>> intersection; |
| for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){ |
| if (psPos.conceptList.get(iConcept).intent!=null && psPos.conceptList.get(iConcept).intent.size()>0){ |
| intersection = computeIntersectionWithIntentExtendedByDeduction(psPos, iConcept, chunksUnknown); |
| if (reduceList(intersection).size()>0) |
| posIntersections.add(reduceList(intersection)); |
| } |
| } |
| for(int iConcept = 0; iConcept<psNeg.conceptList.size(); iConcept++){ |
| if (psNeg.conceptList.get(iConcept).intent!=null && psNeg.conceptList.get(iConcept).intent.size()>0){ |
| intersection = computeIntersectionWithIntentExtendedByDeduction(psNeg, iConcept, chunksUnknown); |
| if (reduceList(intersection).size()>0) |
| negIntersections.add(reduceList(intersection)); |
| } |
| } |
| |
| Pair<List<List<List<ParseTreeChunk>>>, List<List<List<ParseTreeChunk>>>> pair = |
| removeInconsistenciesFromPosNegIntersections( posIntersections, |
| negIntersections); |
| |
| posIntersections = pair.getFirst(); |
| negIntersections = pair.getSecond(); |
| |
| List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg = new ArrayList<>(), |
| negIntersectionsUnderPos = new ArrayList<>(); |
| |
| for(int iConcept = 0; iConcept<psNeg.conceptList.size(); iConcept++){ |
| for (List<List<ParseTreeChunk>> negIntersection : negIntersections) { |
| intersection = md |
| .matchTwoSentencesGroupedChunksDeterministic(psNeg.conceptList.get(iConcept).intent, negIntersection); |
| if (reduceList(intersection).size() > 0) |
| posIntersectionsUnderNeg.add(reduceList(intersection)); |
| } |
| } |
| |
| for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){ |
| for (List<List<ParseTreeChunk>> posIntersection : posIntersections) { |
| intersection = md |
| .matchTwoSentencesGroupedChunksDeterministic(psPos.conceptList.get(iConcept).intent, posIntersection); |
| if (reduceList(intersection).size() > 0) |
| negIntersectionsUnderPos.add(reduceList(intersection)); |
| } |
| } |
| |
| List<ParseTreeChunk>posIntersectionsUnderNegLst = flattenParseTreeChunkLst(posIntersectionsUnderNeg); |
| List<ParseTreeChunk>negIntersectionsUnderPosLst=flattenParseTreeChunkLst(negIntersectionsUnderPos); |
| |
| posIntersectionsUnderNegLst = subtract(posIntersectionsUnderNegLst, negIntersectionsUnderPosLst); |
| negIntersectionsUnderPosLst= subtract(negIntersectionsUnderPosLst, posIntersectionsUnderNegLst); |
| |
| System.out.println("Pos - neg inters = "+posIntersectionsUnderNegLst); |
| System.out.println("Neg - pos inters = "+negIntersectionsUnderPosLst); |
| |
| Boolean bPositiveClass = (float)posIntersectionsUnderNegLst.size()/(float)negIntersectionsUnderPosLst.size() > 1f; |
| |
| JSMDecision decision = new JSMDecision("keywordClassName", bPositiveClass, |
| posIntersections , negIntersections, |
| posIntersectionsUnderNeg, |
| negIntersectionsUnderPos, separationKeywords); |
| |
| accumulatedJSMResults.add(decision); |
| |
| return decision; |
| |
| } |
| |
| private List<List<ParseTreeChunk>> computeIntersectionWithIntentExtendedByDeduction( |
| LinguisticPatternStructure psPos, int iConcept, |
| List<List<ParseTreeChunk>> chunksUnknown) { |
| |
| List<List<ParseTreeChunk>> intent = psPos.conceptList.get(iConcept).intent, |
| intentExtendedByDeduction = new ArrayList<>(); |
| |
| |
| for( List<ParseTreeChunk> group: intent){ |
| List<ParseTreeChunk> newGroup = new ArrayList<>(); |
| for(ParseTreeChunk ch: group){ |
| newGroup.add(ch); |
| List<String> lemmas = ch.getLemmas(); |
| List<List<List<ParseTreeChunk>>> clausesBodiesToAdd = findClausesForListOfLemmas(lemmas); |
| if (clausesBodiesToAdd!=null && clausesBodiesToAdd.size()>0) |
| intentExtendedByDeduction .add(flattenParseTreeChunkLst(clausesBodiesToAdd)); |
| } |
| intentExtendedByDeduction .add(newGroup); |
| } |
| return md |
| .matchTwoSentencesGroupedChunksDeterministic(intentExtendedByDeduction, chunksUnknown); |
| |
| } |
| |
| // for list of words in a phrase, identify if it includes a separation word/multiword and get respective clause body |
| //(to add to existing intent by the calling func) |
| private List<List<List<ParseTreeChunk>>> findClausesForListOfLemmas( |
| List<String> lemmas) { |
| for(String lemma: lemmas){ |
| for(JSMDecision dec: accumulatedJSMResults ){ |
| String[] sepKeywords = dec.getSeparationKeywords(); |
| // if all separation keywords occur in this phrase |
| if (ListUtils.intersection(lemmas, Arrays.asList(sepKeywords)).size() == sepKeywords.length){ |
| return dec.getPosHypotheses(); |
| } |
| } |
| } |
| return null; |
| } |
| |
| public Pair<List<String>, List<String>> reGroupByOccurrenceOfSeparationKeyword(List<String> posTexts, List<String> negTexts, String[] keywords){ |
| List<String> posTextsNew = new ArrayList<>(), negTextsNew = new ArrayList<>(); |
| for(String posText:posTexts){ |
| boolean multiwordOccurs = true; |
| for(String keyword: keywords){ |
| if (!posText.contains(keyword)) |
| multiwordOccurs = false; |
| break; |
| } |
| if (multiwordOccurs) |
| posTextsNew.add(posText); |
| else |
| negTextsNew.add(posText); |
| } |
| for(String negText:negTexts){ |
| boolean multiwordOccurs = true; |
| for(String keyword: keywords){ |
| if (!negText.contains(keyword)) |
| multiwordOccurs = false; |
| break; |
| } |
| if (multiwordOccurs) |
| posTextsNew.add(negText); |
| else |
| negTextsNew.add(negText); |
| } |
| |
| |
| return new Pair<>(posTextsNew, negTextsNew); |
| } |
| |
| |
| |
| public static void main (String[] args) { |
| |
| String[] posArr = new String[] {"I rent an office space. This office is for my business. I can deduct office rental expense from my business profit to calculate net income. ", |
| "To run my business, I have to rent an office. The net business profit is calculated as follows. Rental expense needs to be subtracted from revenue. ", |
| "To store goods for my retail business I rent some space. When I calculate the net income, I take revenue and subtract business expenses such as office rent. ", |
| "I rent some space for my business. To calculate my net income, I subtract from revenue my rental business expense."}; |
| |
| String[] negArr = new String[] {"I rent out a first floor unit of my house to a travel business. I need to add the rental income to my profit. However, when I repair my house, I can deduct the repair expense from my rental income. ", |
| "I receive rental income from my office. I have to claim it as a profit in my tax forms. I need to add my rental income to my profits, but subtract rental expenses such as repair from it. ", |
| "I advertised my property as a business rental. Advertisement and repair expenses can be subtracted from the rental income. Remaining rental income needs to be added to my profit and be reported as taxable profit. ", |
| "I showed my property to a business owner to rent. Expenses on my time spent on advertisement are subtracted from the rental income. My rental profits are added to my taxable income. "}; |
| |
| String unknown = "I do not want to rent anything to anyone. I just want to rent a space for myself. I neither calculate deduction of individual or business tax. I subtract my tax from my income"; |
| JSMLearnerOnLatticeWithDeduction jsm = new JSMLearnerOnLatticeWithDeduction(); |
| JSMDecision dec1 = // may be determined by 'subtract' |
| jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"subtract"}); |
| JSMDecision dec2 = // may be determined by ... |
| jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"business"}); |
| JSMDecision dec3 = // may be determined by ... |
| jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"property"}); |
| // Finally, do prediction |
| JSMDecision dec = // may be determined by ... |
| jsm.buildLearningModel(Arrays.asList(posArr), Arrays.asList(negArr), unknown , new String[]{"property"}); |
| |
| |
| |
| |
| |
| } |
| } |