blob: 86cd2dc1d4cd3d1a5217b5431571162ec883358b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.opinion_processor;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import opennlp.tools.apps.relevanceVocabs.SentimentVocab;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.parse_thicket.VerbNetProcessor;
import opennlp.tools.parse_thicket.kernel_interface.DescriptiveParagraphFromDocExtractor;
import opennlp.tools.parse_thicket.matching.Matcher;
import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
import opennlp.tools.similarity.apps.utils.Pair;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.TextProcessor;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
public class SentencePhraseGivenAWordGetter {
protected static Matcher matcher;
protected ArrayList<File> queue = new ArrayList<File>();
protected static PT2ThicketPhraseBuilder phraseBuilder;
static {
synchronized (SentencePhraseGivenAWordGetter.class) {
matcher = new Matcher();
phraseBuilder = new PT2ThicketPhraseBuilder();
}
}
public SentencePhraseGivenAWordGetter(){
}
public EntityExtractionResult extractEntities(String para, String keyword){
List<List<ParseTreeNode>> extractedPhrases = new ArrayList<List<ParseTreeNode>>();
EntityExtractionResult result = new EntityExtractionResult();
ParseThicket pt = matcher.buildParseThicketFromTextWithRST(para);
List<List<ParseTreeNode>> phrases = pt.getPhrases();
for(List<ParseTreeNode> phrase: phrases){
// find a noun phrase under sentiment
try {
for(int i = 0; i<phrase.size(); i++){
ParseTreeNode word = phrase.get(i);
if (word.getWord().toLowerCase().equals(keyword.toLowerCase())){
extractedPhrases.add(phrase);
break;
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
result.setExtractedSentimentPhrases(extractedPhrases);
return result;
}
public static void main(String[] args){
SentencePhraseGivenAWordGetter self = new SentencePhraseGivenAWordGetter();
EntityExtractionResult result = self.extractEntities("However i put a foam panel inside the main case if i do not have my headphones or an iPad to brace the mac book",
"panel");
System.out.println(result.getExtractedSentimentPhrases());
}
}
/*
3 phrases are given as a result
*
[[<2>SBAR'i':FW, <3>SBAR'put':VBD, <4>SBAR'a':DT, <5>SBAR'foam':NN, <6>SBAR'panel':NN, <7>SBAR'inside':IN, <8>SBAR'the':DT, <9>SBAR'main':JJ, <10>SBAR'case':NN, <11>SBAR'if':IN, <12>SBAR'i':FW,
<13>SBAR'do':VBP, <14>SBAR'not':RB, <15>SBAR'have':VB, <16>SBAR'my':PRP$, <17>SBAR'headphones':NNS, <18>SBAR'or':CC, <19>SBAR'an':DT, <20>SBAR'iPad':NN, <21>SBAR'to':TO,
<22>SBAR'brace':VB, <23>SBAR'the':DT, <24>SBAR'mac':NN, <25>SBAR'book':NN],
[<3>VP'put':VBD, <4>VP'a':DT, <5>VP'foam':NN, <6>VP'panel':NN, <7>VP'inside':IN, <8>VP'the':DT, <9>VP'main':JJ, <10>VP'case':NN, <11>VP'if':IN, <12>VP'i':FW, <13>VP'do':VBP,
<14>VP'not':RB, <15>VP'have':VB, <16>VP'my':PRP$, <17>VP'headphones':NNS, <18>VP'or':CC, <19>VP'an':DT, <20>VP'iPad':NN, <21>VP'to':TO, <22>VP'brace':VB, <23>VP'the':DT,
<24>VP'mac':NN, <25>VP'book':NN],
[<4>NP'a':DT, <5>NP'foam':NN, <6>NP'panel':NN]]
*/