blob: e33e089aad20344bc3b2ce945184536907129a5b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.request_response_recognizer;
import java.util.ArrayList;
import java.util.List;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.VerbNetProcessor;
import opennlp.tools.parse_thicket.external_rst.MatcherExternalRST;
import opennlp.tools.parse_thicket.external_rst.ParseThicketWithDiscourseTree;
import opennlp.tools.parse_thicket.kernel_interface.TreeKernelBasedClassifierMultiplePara;
/*
* This class performs TK learning based on parse thicket which includes RST relations only
* based on Surdeanu at al RST parser. It does sentence parsing and NLP pipeline of
* Surdeanu's wrapper of Stanford NLP
*/
public class TreeKernelBasedRecognizerOfRequest_Response extends TreeKernelBasedClassifierMultiplePara{
private MatcherExternalRST matcherRST = new MatcherExternalRST();
protected List<String> formTreeKernelStructuresMultiplePara(List<String> texts, String flag) {
//TODO
this.setShortRun();
List<String> extendedTreesDumpTotal = new ArrayList<String>();
try {
for(String text: texts){
// get the parses from original documents, and form the training dataset
try {
System.out.print("About to build pt with external rst from "+text + "\n...");
ParseThicket pt = matcherRST.buildParseThicketFromTextWithRST(text);
if (pt == null)
continue;
System.out.print("About to build extended forest with external rst...");
List<String> extendedTreesDump = // use direct option (true
buildReptresentationForDiscourseTreeAndExtensions((ParseThicketWithDiscourseTree)pt, true);
for(String line: extendedTreesDump)
extendedTreesDumpTotal.add(flag + " |BT| "+line + " |ET| ");
System.out.println("DONE");
} catch (Exception e) {
e.printStackTrace();
}
}
} catch (Exception e) {
e.printStackTrace();
}
return extendedTreesDumpTotal;
}
private List<String> buildReptresentationForDiscourseTreeAndExtensions(ParseThicketWithDiscourseTree pt, boolean bDirectDT){
List<String> extendedTreesDump = new ArrayList<String>();
if (!bDirectDT)
// option 1: use RST relation for extended trees
extendedTreesDump = treeExtender.buildForestForRSTArcs(pt);
else {
// option 2: use DT directly
extendedTreesDump.add(pt.getDtDump());
extendedTreesDump.add(pt.getDtDumpWithPOS());
extendedTreesDump.add(pt.getDtDumpWithEmbeddedTrees());
extendedTreesDump.add(pt.getDtDumpWithVerbNet());
}
return extendedTreesDump;
}
public static void main(String[] args){
VerbNetProcessor p = VerbNetProcessor.
getInstance("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources");
TreeKernelBasedRecognizerOfRequest_Response proc = new TreeKernelBasedRecognizerOfRequest_Response();
proc.setKernelPath("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/src/test/resources/tree_kernel/");
proc.trainClassifier(
YahooAnswersTrainingSetCreator.origFilesDir,
YahooAnswersTrainingSetCreator.origFilesDir.replace("/text", "/neg_text")
);
}
}