blob: c980f9f7d83f476d8835af8801d3d64cc3abb560 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.kernel_interface;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.parse_thicket.VerbNetProcessor;
import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
import opennlp.tools.parse_thicket.matching.Matcher;
import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
import edu.stanford.nlp.trees.Tree;
public class TreeExtenderByAnotherLinkedTree extends PT2ThicketPhraseBuilder {
private static Logger log = Logger
.getLogger("opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree");
public List<String> buildForestForCorefArcs(ParseThicket pt){
List<String> results = new ArrayList<String>();
for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
//if (!arc.getArcType().getType().startsWith("coref"))
// continue;
int fromSent = arc.getCodeFrom().getFirst();
int toSent = arc.getCodeTo().getFirst();
if (fromSent <1 || toSent <1 ) // TODO problem in sentence enumeration => skip building extended trees
return results;
String wordFrom = arc.getLemmaFrom();
String wordTo = arc.getLemmaTo();
List<Tree> trees = getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent-1),
pt.getSentences().get(fromSent-1), new String[]{ wordFrom});
if (trees==null || trees.size()<1)
continue;
System.out.println(trees);
StringBuilder sb = new StringBuilder(10000);
toStringBuilderExtenderByAnotherLinkedTree1(sb, pt.getSentences().get(toSent-1), trees.get(0), new String[]{wordTo});
System.out.println(sb.toString());
results.add(sb.toString());
}
// if no arcs then orig sentences
if (results.isEmpty()){
for(Tree t: pt.getSentences()){
results.add(t.toString());
}
}
return results;
}
// sentences in pt are enumerarted starting from 0;
//this func works with Sista version of Stanford NLP and sentences are coded from 0
public List<String> buildForestForRSTArcs(ParseThicket pt){
List<String> results = new ArrayList<String>();
for(WordWordInterSentenceRelationArc arc: pt.getArcs()){
// TODO - uncomment
//if (!arc.getArcType().getType().startsWith("rst"))
// continue;
int fromSent = arc.getCodeFrom().getFirst();
int toSent = arc.getCodeTo().getFirst();
String wordFrom = arc.getLemmaFrom();
String wordTo = arc.getLemmaTo();
if (wordFrom == null || wordFrom.length()<1 || wordTo == null || wordTo.length()<1)
log.severe("Empty lemmas for RST arc "+ arc);
List<Tree> trees = getASubtreeWithRootAsNodeForWord1(pt.getSentences().get(fromSent),
pt.getSentences().get(fromSent), new String[]{ wordFrom});
if (trees==null || trees.size()<1)
continue;
System.out.println(trees);
StringBuilder sb = new StringBuilder(10000);
Tree tree = trees.get(0);
// instead of phrase type for the root of the tree, we want to put the RST relation name
if (arc.getArcType().getType().startsWith("rst"))
tree.setValue(arc.getArcType().getSubtype());
toStringBuilderExtenderByAnotherLinkedTree1(sb, pt.getSentences().get(toSent), tree, new String[]{wordTo});
System.out.println(sb.toString());
results.add(sb.toString());
}
// if no arcs then orig sentences
if (results.isEmpty()){
for(Tree t: pt.getSentences()){
results.add(t.toString());
}
}
return results;
}
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert, String[] corefWords) {
if (t.isLeaf()) {
if (t.label() != null) {
sb.append(t.label().value());
}
return sb;
} else {
sb.append('(');
if (t.label() != null) {
if (t.value() != null) {
sb.append(t.label().value());
}
}
boolean bInsertNow=false;
Tree[] kids = t.children();
if (kids != null) {
for (Tree kid : kids) {
if (corefWords!=null){
String word = corefWords[corefWords.length-1];
String phraseStr = kid.toString();
phraseStr=phraseStr.replace(")", "");
if (phraseStr.endsWith(word)){
bInsertNow=true;
}
}
}
if (bInsertNow){
for (Tree kid : kids) {
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null);
}
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null);
} else {
for (Tree kid : kids) {
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords);
}
}
}
return sb.append(')');
}
}
// given a parse tree and a
public List<Tree> getASubtreeWithRootAsNodeForWord1(Tree tree, Tree currentSubTree, String[] corefWords){
if (currentSubTree.isLeaf()){
return null;
}
List<Tree> result = null;
Tree[] kids = currentSubTree.children();
if (kids != null) {
boolean bFound=false;
String word = corefWords[corefWords.length-1];
for (Tree kid : kids) {
if (bFound){
result.add(kid);
} else {
String phraseStr = kid.toString();
phraseStr=phraseStr.replace(")", "");
if (phraseStr.endsWith(word)){ // found
bFound=true;
result = new ArrayList<Tree>();
}
}
}
if (bFound){
return result;
}
// if not a selected node, proceed with iteration
for (Tree kid : kids) {
List<Tree> ts = getASubtreeWithRootAsNodeForWord1(tree, kid, corefWords);
if (ts!=null)
return ts;
}
}
return null;
}
// now obsolete
public Tree[] getASubtreeWithRootAsNodeForWord(Tree tree, Tree currentSubTree, String[] corefWords){
if (currentSubTree.isLeaf()){
return null;
}
boolean bInsertNow=false;
/*List<ParseTreeNode> bigTreeNodes = parsePhrase(currentSubTree.label().value());
for(ParseTreeNode smallNode: bigTreeNodes ){
if (bigTreeNodes.get(0).getWord().equals("") )
continue;
String word = bigTreeNodes.get(0).getWord();
for(String cWord: corefWords){
if (word.equalsIgnoreCase(cWord))
bInsertNow=true;
}
} */
String nodePhraseStr = currentSubTree.toString();
System.out.println(nodePhraseStr);
for(String w: corefWords)
nodePhraseStr = nodePhraseStr.replace(w, "");
// all words are covered
if (nodePhraseStr.toUpperCase().equals(nodePhraseStr))
bInsertNow=true;
//if(bInsertNow)
// return currentSubTree;
Tree[] kids = currentSubTree.children();
if (kids != null) {
/*for (Tree kid : kids) {
List<ParseTreeNode> bigTreeNodes = parsePhrase(kid.label().value());
if (bigTreeNodes!=null && bigTreeNodes.size()>0 && bigTreeNodes.get(0)!=null &&
bigTreeNodes.get(0).getWord().equalsIgnoreCase(corefWords[0])){
bInsertNow=true;
return kids;
}
}*/
for (Tree kid : kids) {
Tree[] t = getASubtreeWithRootAsNodeForWord(tree, kid, corefWords);
if (t!=null)
return t;
}
}
return null;
}
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree(StringBuilder sb, Tree t, Tree treeToInsert) {
if (t.isLeaf()) {
if (t.label() != null) {
sb.append(t.label().value());
}
return sb;
} else {
sb.append('(');
if (t.label() != null) {
if (t.value() != null) {
sb.append(t.label().value());
}
}
boolean bInsertNow=false;
// we try match trees to find out if we are at the insertion position
if (treeToInsert!=null){
List<ParseTreeNode> bigTreeNodes = parsePhrase(t.label().value());
List<ParseTreeNode> smallTreeNodes = parsePhrase(treeToInsert.getChild(0).getChild(0).getChild(0).label().value());
System.out.println(t + " \n "+ treeToInsert+ "\n");
if (smallTreeNodes.size()>0 && bigTreeNodes.size()>0)
for(ParseTreeNode smallNode: smallTreeNodes ){
if (!bigTreeNodes.get(0).getWord().equals("")
&& bigTreeNodes.get(0).getWord().equalsIgnoreCase(smallNode.getWord()))
bInsertNow=true;
}
}
if (bInsertNow){
Tree[] kids = t.children();
if (kids != null) {
for (Tree kid : kids) {
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree(sb, kid, null);
}
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree(sb, treeToInsert.getChild(0).getChild(1), null);
int z=0; z++;
}
} else {
Tree[] kids = t.children();
if (kids != null) {
for (Tree kid : kids) {
sb.append(' ');
toStringBuilderExtenderByAnotherLinkedTree(sb, kid, treeToInsert);
}
}
}
return sb.append(')');
}
}
public StringBuilder toStringBuilder(StringBuilder sb, Tree t) {
if (t.isLeaf()) {
if (t.label() != null) {
sb.append(t.label().value());
}
return sb;
} else {
sb.append('(');
if (t.label() != null) {
if (t.value() != null) {
sb.append(t.label().value());
}
}
Tree[] kids = t.children();
if (kids != null) {
for (Tree kid : kids) {
sb.append(' ');
toStringBuilder(sb, kid);
}
}
return sb.append(')');
}
}
public static void main(String[] args){
VerbNetProcessor p = VerbNetProcessor.
getInstance("/Users/borisgalitsky/Documents/workspace/deepContentInspection/src/test/resources");
Matcher matcher = new Matcher();
TreeExtenderByAnotherLinkedTree extender = new TreeExtenderByAnotherLinkedTree();
ParseThicket pt = matcher.buildParseThicketFromTextWithRST(//"I went to the forest to look for a tree. I found out that it was thick and green");
"Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons. "+
"UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +
"A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +
"Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. ");
List<String> results = extender.buildForestForCorefArcs(pt);
System.out.println(results);
//System.exit(0);
List<Tree> forest = pt.getSentences();
List<Tree> trees = extender.getASubtreeWithRootAsNodeForWord1(forest.get(1), forest.get(1), new String[]{"its"});
System.out.println(trees);
StringBuilder sb = new StringBuilder(10000);
extender.toStringBuilderExtenderByAnotherLinkedTree1(sb, forest.get(0), trees.get(0), new String[]{"the", "forest"});
System.out.println(sb.toString());
//
//extender.toStringBuilderExtenderByAnotherLinkedTree(sb, forest.get(0), forest.get(1));
//System.out.println(sb.toString());
}
}