blob: 9c1c44a3100a7d00fb3ec0d7953bacfad9b9ecae [file] [log] [blame]
package opennlp.tools.parse_thicket.kernel_interface;
import java.util.ArrayList;
import java.util.List;
import edu.stanford.nlp.trees.Tree;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.matching.Matcher;
import opennlp.tools.textsimilarity.ParseTreeChunk;
public class PT2ExtendedTreeForestBuilder {
private Matcher matcher = new Matcher();
private TreeKernelRunner tkRunner = new TreeKernelRunner();
private static final String modelFileName = "model.txt",
trainingFileName = "training.txt";
private List<String[]> formTrainingSetFromText(String para, boolean positive){
String prefix = null;
if (positive)
prefix=" 1 ";
else
prefix=" -1 ";
ParseThicket pt = matcher.buildParseThicketFromTextWithRST(para);
List<Tree> forest = pt.getSentences();
List<String[]> treeBankBuffer = new ArrayList<String[]>();
for(Tree t: forest){
treeBankBuffer.add(new String[] {prefix+"|BT| "+t.toString()+ " |ET|"});
}
return treeBankBuffer;
}
public void formPosNegTrainingSet(String pos, String neg, String path){
List<String[]> list = formTrainingSetFromText(pos, true),
negList= formTrainingSetFromText(neg, false);
list.addAll(negList);
ProfileReaderWriter.writeReport(list, path+trainingFileName, ' ');
tkRunner.runLearner(path, trainingFileName, modelFileName);
}
public void classifySentences(String sentences, String path){
ParseThicket pt = matcher.buildParseThicketFromTextWithRST(sentences);
List<Tree> forest = pt.getSentences();
List<String[]> treeBankBuffer = new ArrayList<String[]>();
for(Tree t: forest){
treeBankBuffer.add(new String[] {" 0 |BT| "+t.toString()+ " |ET|"});
}
ProfileReaderWriter.writeReport(treeBankBuffer, path+"unknown.txt", ' ');
tkRunner.runClassifier(path, "unknown.txt", modelFileName, "classifier_output.txt");
}
public static void main(String[] args){
PT2ExtendedTreeForestBuilder builder = new PT2ExtendedTreeForestBuilder();
String posSents = "Iran refuses to accept the UN proposal to end its dispute over its work on nuclear weapons."+
"UN nuclear watchdog passes a resolution condemning Iran for developing its second uranium enrichment site in secret. " +
"A recent IAEA report presented diagrams that suggested Iran was secretly working on nuclear weapons. " +
"Iran envoy says its nuclear development is for peaceful purpose, and the material evidence against it has been fabricated by the US. ";
String negSents = "Iran refuses the UN offer to end a conflict over its nuclear weapons."+
"UN passes a resolution prohibiting Iran from developing its uranium enrichment site. " +
"A recent UN report presented charts saying Iran was working on nuclear weapons. " +
"Iran envoy to UN states its nuclear development is for peaceful purpose, and the evidence against its claim is fabricated by the US. ";
builder.formPosNegTrainingSet(posSents, negSents, "C:\\stanford-corenlp\\tree_kernel\\");
builder.classifySentences("Iran refuses Iraq's offer to end its conflict with UN. Iran passes a resolution prohibiting UN from doing second" +
" uranium enrichment site. Envoy to US says its nuclear development is for peaceful purposes. Material evidence againt US has been fabricated by UN.",
"C:\\stanford-corenlp\\tree_kernel\\");
}
}