| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package opennlp.tools.textsimilarity.chunker2matcher; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.util.ArrayList; |
| import java.util.List; |
| import java.util.logging.Level; |
| import java.util.logging.Logger; |
| |
| import opennlp.tools.chunker.ChunkerME; |
| import opennlp.tools.chunker.ChunkerModel; |
| import opennlp.tools.cmdline.parser.ParserTool; |
| import opennlp.tools.parser.AbstractBottomUpParser; |
| import opennlp.tools.parser.Parse; |
| import opennlp.tools.parser.Parser; |
| import opennlp.tools.parser.ParserFactory; |
| import opennlp.tools.parser.ParserModel; |
| import opennlp.tools.postag.POSModel; |
| import opennlp.tools.postag.POSTagger; |
| import opennlp.tools.postag.POSTaggerME; |
| import opennlp.tools.sentdetect.SentenceDetector; |
| import opennlp.tools.sentdetect.SentenceDetectorME; |
| import opennlp.tools.sentdetect.SentenceModel; |
| import opennlp.tools.textsimilarity.LemmaPair; |
| import opennlp.tools.textsimilarity.ParseTreeChunk; |
| import opennlp.tools.textsimilarity.ParseTreeMatcherDeterministic; |
| import opennlp.tools.textsimilarity.SentencePairMatchResult; |
| import opennlp.tools.textsimilarity.TextProcessor; |
| import opennlp.tools.tokenize.Tokenizer; |
| import opennlp.tools.tokenize.TokenizerME; |
| import opennlp.tools.tokenize.TokenizerModel; |
| import opennlp.tools.util.Span; |
| |
| |
| public class ParserChunker2MatcherProcessor { |
| protected static final int MIN_SENTENCE_LENGTH = 10; |
| private static final String MODEL_DIR_KEY = "nlp.models.dir"; |
| // TODO config |
| // this is where resources shoudl live |
| private static String MODEL_DIR = "resources/models"; |
| protected static ParserChunker2MatcherProcessor instance; |
| |
| private SentenceDetector sentenceDetector; |
| private Tokenizer tokenizer; |
| private POSTagger posTagger; |
| private Parser parser; |
| private ChunkerME chunker; |
| private final int NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS = 5; |
| private static Logger LOG = Logger.getLogger("opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor"); |
| |
| protected ParserChunker2MatcherProcessor() { |
| MODEL_DIR = new File(".").getAbsolutePath().replace(".", "")+MODEL_DIR; |
| initializeSentenceDetector(); |
| initializeTokenizer(); |
| initializePosTagger(); |
| initializeParser(); |
| initializeChunker(); |
| } |
| |
| /** |
| * singleton method of instantiating the processor |
| * @return the instance |
| */ |
| public synchronized static ParserChunker2MatcherProcessor getInstance() { |
| if (instance == null) |
| instance = new ParserChunker2MatcherProcessor(); |
| |
| return instance; |
| } |
| |
| /** |
| * General parsing function, which returns lists of parses for a portion of text |
| * @param text to be parsed |
| * @return lists of parses |
| */ |
| public List<List<Parse>> parseTextNlp(String text) { |
| if (text == null || text.trim().length() == 0) |
| return null; |
| |
| List<List<Parse>> textParses = new ArrayList<List<Parse>>(1); |
| |
| // parse paragraph by paragraph |
| String[] paragraphList = splitParagraph(text); |
| for (String paragraph : paragraphList) { |
| if (paragraph.length() == 0) |
| continue; |
| |
| List<Parse> paragraphParses = parseParagraphNlp(paragraph); |
| if (paragraphParses != null) |
| textParses.add(paragraphParses); |
| } |
| |
| return textParses; |
| } |
| |
| public List<Parse> parseParagraphNlp(String paragraph) { |
| if (paragraph == null || paragraph.trim().length() == 0) |
| return null; |
| |
| // normalize the text before parsing, otherwise, the sentences may not |
| // be |
| // separated correctly |
| |
| //paragraph = TextNormalizer.normalizeText(paragraph); |
| |
| // parse sentence by sentence |
| String[] sentences = splitSentences(paragraph); |
| List<Parse> parseList = new ArrayList<Parse>(sentences.length); |
| for (String sentence : sentences) { |
| sentence = sentence.trim(); |
| if (sentence.length() == 0) |
| continue; |
| |
| Parse sentenceParse = parseSentenceNlp(sentence, false); |
| if (sentenceParse != null) |
| parseList.add(sentenceParse); |
| } |
| |
| return parseList; |
| } |
| |
| public Parse parseSentenceNlp(String sentence) { |
| // if we parse an individual sentence, we want to normalize the text |
| // before parsing |
| return parseSentenceNlp(sentence, true); |
| } |
| |
| public synchronized Parse parseSentenceNlp(String sentence, |
| boolean normalizeText) { |
| // don't try to parse very short sentence, not much info in it anyway, |
| // most likely a heading |
| if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH) |
| return null; |
| |
| //if (normalizeText) |
| // sentence = TextNormalizer.normalizeText(sentence); |
| |
| Parse[] parseArray = null; |
| try { |
| parseArray = ParserTool.parseLine(sentence, parser, 1); |
| } catch (Throwable t) { |
| LOG.log(Level.WARNING, "failed to parse the sentence : '"+sentence, t); |
| return null; |
| } |
| |
| // Parse[] chunks = ChunkSampleStream.getInitialChunks(parseArray[0]) ; |
| |
| // there should be only one result parse |
| if (parseArray != null && parseArray.length > 0) |
| return parseArray[0]; |
| else |
| return null; |
| } |
| |
| /** |
| * |
| * @param para input text string which is assumed to be a paragraph and is split into sentences |
| * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.) |
| */ |
| |
| public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForPara(String para){ |
| List<List<ParseTreeChunk>> listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>(); |
| String[] sentences = splitSentences(para); |
| for(String sent: sentences){ |
| List<List<ParseTreeChunk>> singleSentChunks = formGroupedPhrasesFromChunksForSentence(sent); |
| if (singleSentChunks==null) |
| continue; |
| if (listOfChunksAccum.size()<1 ){ |
| listOfChunksAccum = new ArrayList<List<ParseTreeChunk>>(singleSentChunks); |
| } else |
| for(int i= 0; i<NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS; i++){ |
| //make sure not null |
| if (singleSentChunks == null || singleSentChunks.size()!=NUMBER_OF_SECTIONS_IN_SENTENCE_CHUNKS) |
| break; |
| List<ParseTreeChunk> phraseI = singleSentChunks.get(i); |
| List<ParseTreeChunk> phraseIaccum = listOfChunksAccum.get(i); |
| phraseIaccum.addAll(phraseI); |
| listOfChunksAccum.set(i, phraseIaccum); |
| } |
| } |
| return listOfChunksAccum; |
| } |
| |
| /** |
| * |
| * @param para input text string which is assumed to be a sentence |
| * @return a list of lists of phrases with their POS tags for each phrase type (noun, verb etc.) |
| */ |
| public synchronized List<List<ParseTreeChunk>> formGroupedPhrasesFromChunksForSentence(String sentence) { |
| if (sentence == null || sentence.trim().length() < MIN_SENTENCE_LENGTH) |
| return null; |
| |
| sentence = TextProcessor.removePunctuation(sentence); |
| |
| String[] toks = tokenizer.tokenize(sentence); |
| String[] tags = new String[toks.length]; //posTagger.tag(toks); |
| SentenceNode node = parseSentenceNode(sentence); |
| if (node==null){ |
| LOG.info("Problem parsing sentence '"+sentence); |
| return null; |
| } |
| List<String> POSlist = node.getOrderedPOSList(); |
| |
| tags = POSlist.toArray(new String[0]); |
| if (toks.length != tags.length){ |
| LOG.info("disagreement between toks and tags; sent = '"+sentence + "'\n tags = "+tags + |
| "\n will now try this sentence in lower case" ); |
| node = parseSentenceNode(sentence.toLowerCase()); |
| if (node==null){ |
| LOG.info("Problem parsing sentence '"+sentence); |
| return null; |
| } |
| POSlist = node.getOrderedPOSList(); |
| tags = POSlist.toArray(new String[0]); |
| if (toks.length != tags.length){ |
| LOG.info("AGAIN: disagreement between toks and tags for lower case! "); |
| if (toks.length>tags.length){ |
| String[] newToks = new String[tags.length]; |
| for(int i = 0; i<tags.length; i++ ){ |
| newToks[i] = toks[i]; |
| } |
| toks = newToks; |
| |
| } else |
| return null; |
| } |
| } |
| String[] res = chunker.chunk(toks, tags); |
| |
| List<List<ParseTreeChunk>> listOfChunks = new ArrayList<List<ParseTreeChunk>>(); |
| List<ParseTreeChunk> nounPhr = new ArrayList<ParseTreeChunk>(), |
| prepPhr = new ArrayList<ParseTreeChunk>(), verbPhr = new ArrayList<ParseTreeChunk>(), |
| adjPhr = new ArrayList<ParseTreeChunk>(), |
| // to store the whole sentence |
| wholeSentence = new ArrayList<ParseTreeChunk>(); |
| List<String> pOSsAll = new ArrayList<String>(), lemmasAll = new ArrayList<String>(); |
| |
| for(int i = 0; i< toks.length; i++){ |
| pOSsAll.add(tags[i]); |
| lemmasAll.add(toks[i]); |
| } |
| wholeSentence.add(new ParseTreeChunk("SENTENCE", lemmasAll, pOSsAll)); |
| |
| boolean currPhraseClosed = false; |
| for(int i=0; i< res.length; i++){ |
| String bi_POS = res[i]; |
| currPhraseClosed = false; |
| if (bi_POS.startsWith("B-NP")){// beginning of a phrase |
| |
| List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>(); |
| pOSs.add(tags[i]); |
| lemmas.add(toks[i]); |
| for(int j=i+1; j<res.length; j++){ |
| if (res[j].startsWith("B-VP")){ |
| nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs)); |
| //LOG.info(i + " => " +lemmas); |
| currPhraseClosed = true; |
| break; |
| } else { |
| pOSs.add(tags[j]); |
| lemmas.add(toks[j]); |
| } |
| } |
| if (!currPhraseClosed){ |
| nounPhr.add(new ParseTreeChunk("NP", lemmas, pOSs)); |
| //LOG.fine(i + " => " + lemmas); |
| } |
| |
| } else if (bi_POS.startsWith("B-PP")){// beginning of a phrase |
| List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>(); |
| pOSs.add(tags[i]); |
| lemmas.add(toks[i]); |
| |
| for(int j=i+1; j<res.length; j++){ |
| if (res[j].startsWith("B-VP")){ |
| prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs)); |
| //LOG.fine(i + " => " + lemmas); |
| currPhraseClosed = true; |
| break; |
| } else { |
| pOSs.add(tags[j]); |
| lemmas.add(toks[j]); |
| } |
| } |
| if (!currPhraseClosed){ |
| prepPhr.add(new ParseTreeChunk("PP", lemmas, pOSs)); |
| //LOG.fine(i + " => " + lemmas); |
| } |
| } else |
| if (bi_POS.startsWith("B-VP")){// beginning of a phrase |
| List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>(); |
| pOSs.add(tags[i]); |
| lemmas.add(toks[i]); |
| |
| for(int j=i+1; j<res.length; j++){ |
| if (res[j].startsWith("B-VP")){ |
| verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs)); |
| //LOG.fine(i + " => " +lemmas); |
| currPhraseClosed = true; |
| break; |
| } else { |
| pOSs.add(tags[j]); |
| lemmas.add(toks[j]); |
| } |
| } |
| if (!currPhraseClosed){ |
| verbPhr.add(new ParseTreeChunk("VP", lemmas, pOSs)); |
| //LOG.fine(i + " => " + lemmas); |
| } |
| } else |
| if (bi_POS.startsWith("B-ADJP") ){// beginning of a phrase |
| List<String> pOSs = new ArrayList<String>(), lemmas = new ArrayList<String>(); |
| pOSs.add(tags[i]); |
| lemmas.add(toks[i]); |
| |
| for(int j=i+1; j<res.length; j++){ |
| if (res[j].startsWith("B-VP")){ |
| adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs)); |
| //LOG.fine(i + " => " +lemmas); |
| currPhraseClosed = true; |
| break; |
| } else { |
| pOSs.add(tags[j]); |
| lemmas.add(toks[j]); |
| } |
| } |
| if (!currPhraseClosed){ |
| adjPhr.add(new ParseTreeChunk("ADJP", lemmas, pOSs)); |
| //LOG.fine(i + " => " + lemmas); |
| } |
| } |
| } |
| listOfChunks.add(nounPhr); |
| listOfChunks.add(verbPhr); |
| listOfChunks.add(prepPhr); |
| listOfChunks.add(adjPhr); |
| listOfChunks.add(wholeSentence); |
| |
| return listOfChunks; |
| } |
| |
| public static List<List<SentenceNode>> textToSentenceNodes( |
| List<List<Parse>> textParses) { |
| if (textParses == null || textParses.size() == 0) |
| return null; |
| |
| List<List<SentenceNode>> textNodes = new ArrayList<List<SentenceNode>>( |
| textParses.size()); |
| for (List<Parse> paragraphParses : textParses) { |
| List<SentenceNode> paragraphNodes = paragraphToSentenceNodes(paragraphParses); |
| |
| // append paragraph node if any |
| if (paragraphNodes != null && paragraphNodes.size() > 0) |
| textNodes.add(paragraphNodes); |
| } |
| |
| if (textNodes.size() > 0) |
| return textNodes; |
| else |
| return null; |
| } |
| |
| public static List<SentenceNode> paragraphToSentenceNodes( |
| List<Parse> paragraphParses) { |
| if (paragraphParses == null || paragraphParses.size() == 0) |
| return null; |
| |
| List<SentenceNode> paragraphNodes = new ArrayList<SentenceNode>( |
| paragraphParses.size()); |
| for (Parse sentenceParse : paragraphParses) { |
| SentenceNode sentenceNode = null; |
| try { |
| sentenceNode = sentenceToSentenceNode(sentenceParse); |
| } catch (Exception e) { |
| // don't fail the whole paragraph when a single sentence fails |
| LOG.severe("Failed to convert sentence to node. error: " + e); |
| sentenceNode = null; |
| } |
| |
| if (sentenceNode != null) |
| paragraphNodes.add(sentenceNode); |
| } |
| |
| if (paragraphNodes.size() > 0) |
| return paragraphNodes; |
| else |
| return null; |
| } |
| |
| public static SentenceNode sentenceToSentenceNode(Parse sentenceParse) { |
| if (sentenceParse == null) |
| return null; |
| |
| // convert the OpenNLP Parse to our own tree nodes |
| SyntacticTreeNode node = toSyntacticTreeNode(sentenceParse); |
| if ((node == null)) |
| return null; |
| if (node instanceof SentenceNode) |
| return (SentenceNode)node; |
| else if (node instanceof PhraseNode){ |
| SentenceNode sn = new SentenceNode("sentence", node.getChildren()) ; |
| return sn; |
| } else return null; |
| } |
| |
| public List<List<SentenceNode>> parseTextNode(String text) { |
| List<List<Parse>> textParseList = parseTextNlp(text); |
| return textToSentenceNodes(textParseList); |
| } |
| |
| public List<SentenceNode> parseParagraphNode(String paragraph) { |
| List<Parse> paragraphParseList = parseParagraphNlp(paragraph); |
| return paragraphToSentenceNodes(paragraphParseList); |
| } |
| |
| public SentenceNode parseSentenceNode(String sentence) { |
| return parseSentenceNode(sentence, true); |
| } |
| |
| public synchronized SentenceNode parseSentenceNode(String sentence, |
| boolean normalizeText) { |
| Parse sentenceParse = parseSentenceNlp(sentence, normalizeText); |
| return sentenceToSentenceNode(sentenceParse); |
| } |
| |
| public String[] splitParagraph(String text) { |
| String[] res = text.split("\n"); |
| if (res == null || res.length<=1) |
| return new String[] {text}; |
| else |
| return res; |
| |
| } |
| |
| public String[] splitSentences(String text) { |
| if (text == null) |
| return null; |
| |
| return sentenceDetector.sentDetect(text); |
| } |
| |
| public String[] tokenizeSentence(String sentence) { |
| if (sentence == null) |
| return null; |
| |
| return tokenizer.tokenize(sentence); |
| } |
| |
| protected void initializeSentenceDetector() { |
| InputStream is = null; |
| try { |
| is = new FileInputStream( |
| MODEL_DIR + "/en-sent.bin" |
| |
| ); |
| SentenceModel model = new SentenceModel(is); |
| sentenceDetector = new SentenceDetectorME(model); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } finally { |
| if (is != null) { |
| try { |
| is.close(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| } |
| } |
| } |
| |
| protected void initializeTokenizer() { |
| InputStream is = null; |
| try { |
| is = new FileInputStream( |
| MODEL_DIR+ "/en-token.bin" |
| ); |
| TokenizerModel model = new TokenizerModel(is); |
| tokenizer = new TokenizerME(model); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } finally { |
| if (is != null) { |
| try { |
| is.close(); |
| } catch (IOException e) { |
| } |
| } |
| } |
| } |
| |
| protected void initializePosTagger() { |
| InputStream is = null; |
| try { |
| is = new FileInputStream(MODEL_DIR |
| + "/en-pos-maxent.bin"); |
| POSModel model = new POSModel(is); |
| posTagger = new POSTaggerME(model); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } finally { |
| if (is != null) { |
| try { |
| is.close(); |
| } catch (IOException e) { |
| } |
| } |
| } |
| } |
| |
| protected void initializeParser() { |
| InputStream is = null; |
| try { |
| is = new FileInputStream(MODEL_DIR |
| + "/en-parser-chunking.bin"); |
| ParserModel model = new ParserModel(is); |
| parser = ParserFactory.create(model); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } finally { |
| if (is != null) { |
| try { |
| is.close(); |
| } catch (IOException e) { |
| } |
| } |
| } |
| } |
| |
| private void initializeChunker() { |
| InputStream is = null; |
| try { |
| is = new FileInputStream(MODEL_DIR |
| + "/en-chunker.bin"); |
| ChunkerModel model = new ChunkerModel(is); |
| chunker = new ChunkerME(model); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } finally { |
| if (is != null) { |
| try { |
| is.close(); |
| } catch (IOException e) { |
| } |
| } |
| } |
| } |
| |
| /** |
| * convert an instance of Parse to SyntacticTreeNode, by filtering out the |
| * unnecessary data and assigning the word for each node |
| * |
| * @param parse |
| */ |
| private static SyntacticTreeNode toSyntacticTreeNode(Parse parse) { |
| if (parse == null) |
| return null; |
| |
| // check for junk types |
| String type = parse.getType(); |
| if (SyntacticTreeNode.isJunkType(type, parse) ) |
| return null; |
| |
| String text = parse.getText(); |
| ArrayList<SyntacticTreeNode> childrenNodeList = convertChildrenNodes(parse); |
| |
| // check sentence node, the node contained in the top node |
| if (type.equals(AbstractBottomUpParser.TOP_NODE) |
| && childrenNodeList != null && childrenNodeList.size() > 0) { |
| PhraseNode rootNode = (PhraseNode) childrenNodeList.get(0); |
| return new SentenceNode(text, rootNode.getChildren()); |
| } |
| |
| // if this node contains children nodes, then it is a phrase node |
| if (childrenNodeList != null && childrenNodeList.size() > 0) { |
| //System.out.println("Found "+ type + " phrase = "+ childrenNodeList); |
| return new PhraseNode(type, childrenNodeList); |
| |
| } |
| |
| // otherwise, it is a word node |
| Span span = parse.getSpan(); |
| String word = text.substring(span.getStart(), span.getEnd()).trim(); |
| |
| return new WordNode(type, word); |
| } |
| |
| private static ArrayList<SyntacticTreeNode> convertChildrenNodes(Parse parse) { |
| if (parse == null) |
| return null; |
| |
| Parse[] children = parse.getChildren(); |
| if (children == null || children.length == 0) |
| return null; |
| |
| ArrayList<SyntacticTreeNode> childrenNodeList = new ArrayList<SyntacticTreeNode>(); |
| for (Parse child : children) { |
| SyntacticTreeNode childNode = toSyntacticTreeNode(child); |
| if (childNode != null) |
| childrenNodeList.add(childNode); |
| } |
| |
| return childrenNodeList; |
| } |
| |
| /** |
| * The key function of similarity component which takes two portions of text and does similarity assessment by finding the set of all maximum common subtrees |
| * of the set of parse trees for each portion of text |
| * @param input text 1 |
| * @param input text 2 |
| * @return the matching results structure, which includes the similarity score |
| */ |
| public SentencePairMatchResult assessRelevance(String para1, String para2) |
| { |
| List<List<ParseTreeChunk>> sent1GrpLst = formGroupedPhrasesFromChunksForPara(para1), |
| sent2GrpLst = formGroupedPhrasesFromChunksForPara(para2); |
| |
| List<LemmaPair> origChunks1 = listListParseTreeChunk2ListLemmaPairs(sent1GrpLst); |
| |
| |
| ParseTreeMatcherDeterministic md = new ParseTreeMatcherDeterministic(); |
| List<List<ParseTreeChunk>> res = md.matchTwoSentencesGroupedChunksDeterministic(sent1GrpLst, sent2GrpLst); |
| return new SentencePairMatchResult(res, origChunks1); |
| |
| } |
| |
| protected List<LemmaPair> listListParseTreeChunk2ListLemmaPairs( |
| List<List<ParseTreeChunk>> sent1GrpLst) { |
| List<LemmaPair> results = new ArrayList<LemmaPair>(); |
| if (sent1GrpLst==null || sent1GrpLst.size() <1) |
| return results; |
| List<ParseTreeChunk> wholeSentence = sent1GrpLst.get(sent1GrpLst.size()-1); // whole sentence is last list in the list of lists |
| |
| List<String> pOSs = wholeSentence.get(0).getPOSs(); |
| List<String> lemmas = wholeSentence.get(0).getLemmas(); |
| for(int i= 0; i< lemmas.size(); i++){ |
| results.add(new LemmaPair( pOSs.get(i), lemmas.get(i), i )); |
| } |
| |
| return results; |
| } |
| |
| public void printParseTree(String phrase1){ |
| ParserChunker2MatcherProcessor p = ParserChunker2MatcherProcessor.getInstance(); |
| List<List<SentenceNode>> nodeListList = p.parseTextNode(phrase1); |
| for (List<SentenceNode> nodeList : nodeListList) { |
| for (SentenceNode node : nodeList) { |
| System.out.println(node); |
| } |
| } |
| } |
| } |