blob: b915543096a31049bbd8dbed64a662abd085cd6c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.external_rst;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.ParseTreeNode;
import opennlp.tools.parse_thicket.WordWordInterSentenceRelationArc;
import opennlp.tools.parse_thicket.kernel_interface.TreeKernelRunner;
import opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder;
import opennlp.tools.parse_thicket.rhetoric_structure.RhetoricStructureArcsBuilder;
import org.apache.commons.io.FileUtils;
import org.jgrapht.Graph;
import org.jgrapht.graph.DefaultEdge;
import org.jgrapht.graph.SimpleGraph;
import edu.stanford.nlp.trees.Tree;
public class PT2ThicketPhraseBuilderExtrnlRST extends PT2ThicketPhraseBuilder{
private RhetoricStructureArcsBuilder rstBuilder = new RhetoricStructureArcsBuilder();
private ExternalRSTImporter externalRstBuilder = new ExternalRSTImporter();
private static Logger log = Logger
.getLogger("opennlp.tools.parse_thicket.external_rst.PT2ThicketPhraseBuilderExtrnlRST");
/*
* Building phrases takes a Parse Thicket and forms phrases for each sentence individually
* Then based on built phrases and obtained arcs, it builds arcs for RST
* Finally, based on all formed arcs, it extends phrases with thicket phrases
*/
public List<List<ParseTreeNode>> buildPT2ptPhrases(ParseThicket pt, String text, String externRSTpath) {
List<List<ParseTreeNode>> phrasesAllSent = new ArrayList<List<ParseTreeNode>> ();
Map<Integer, List<List<ParseTreeNode>>> sentNumPhrases = new HashMap<Integer, List<List<ParseTreeNode>>>();
// build regular phrases
for(int nSent=0; nSent<pt.getSentences().size(); nSent++){
List<ParseTreeNode> sentence = pt.getNodesThicket().get(nSent);
Tree ptree = pt.getSentences().get(nSent);
//ptree.pennPrint();
List<List<ParseTreeNode>> phrases = buildPT2ptPhrasesForASentence(ptree, sentence);
System.out.println(phrases);
phrasesAllSent.addAll(phrases);
sentNumPhrases.put(nSent, phrases);
}
String filename = "default.txt";
try {
filename = text.split("/n")[0]+".txt";
FileUtils.writeStringToFile(new File(externRSTpath+"/"+filename), text, "utf-8");
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// discover and add RST arcs
List<WordWordInterSentenceRelationArc> arcsRST =
rstBuilder.buildRSTArcsFromMarkersAndCorefs(pt.getArcs(), sentNumPhrases, pt);
String[] commandLine1 = new String[]{"python", "Discourse_Segmenter.py", filename}, commandLine2=
new String[]{"python", "Discourse_Parser.py", "tmp.edu"};
new TreeKernelRunner().runEXE(commandLine1, externRSTpath);
new TreeKernelRunner().runEXE(commandLine2, externRSTpath);
// TODO: code to run joty suite
List<RstNode> rstNodes = new ExternalRSTImporter().buildArrayOfRSTnodes(null, externRSTpath+"/tmp_doc.dis");
// discover and add RST arcs
List<WordWordInterSentenceRelationArc> arcsRSTexternal = externalRstBuilder.buildRSTArcsFromRSTparser( rstNodes, null, sentNumPhrases, pt );
System.out.println(arcsRST);
List<WordWordInterSentenceRelationArc> arcs = pt.getArcs();
arcs.addAll(arcsRST);
arcs.addAll(arcsRSTexternal);
pt.setArcs(arcs);
List<List<ParseTreeNode>> expandedPhrases = expandTowardsThicketPhrases(phrasesAllSent, pt.getArcs(), sentNumPhrases, pt);
return expandedPhrases;
}
}