blob: 1b2790f7f04baba672e04d7fe642ee67cecf90f0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.tools.parse_thicket.kernel_interface;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.logging.Logger;
import edu.stanford.nlp.trees.Tree;
import opennlp.tools.jsmlearning.ProfileReaderWriter;
import opennlp.tools.parse_thicket.ParseThicket;
import opennlp.tools.parse_thicket.apps.MultiSentenceSearchResultsProcessor;
import opennlp.tools.parse_thicket.apps.SnippetToParagraph;
import opennlp.tools.parse_thicket.matching.Matcher;
import opennlp.tools.similarity.apps.BingQueryRunner;
import opennlp.tools.similarity.apps.HitBase;
import opennlp.tools.similarity.apps.HitBaseComparable;
import opennlp.tools.similarity.apps.WebSearchEngineResultsScraper;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.SentencePairMatchResult;
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
public class MultiSentenceKernelBasedExtendedForestSearchResultsProcessor extends MultiSentenceKernelBasedSearchResultsProcessor{
private static Logger LOG = Logger
.getLogger("opennlp.tools.similarity.apps.MultiSentenceKernelBasedExtendedForestSearchResultsProcessor");
protected TreeExtenderByAnotherLinkedTree treeExtender = new TreeExtenderByAnotherLinkedTree();
protected List<String[]> formTreeKernelStructure(String searchResultText, int count, List<HitBase> hits) {
List<String[]> treeBankBuffer = new ArrayList<String[]> ();
try {
// get the parses from original documents, and form the training dataset
ParseThicket pt = matcher.buildParseThicketFromTextWithRST(searchResultText);
List<String> extendedTreesDump = treeExtender.buildForestForCorefArcs(pt);
// if from the first half or ranked docs, then positive, otherwise negative
String posOrNeg = null;
if (count<hits.size()/2)
posOrNeg=" 1 ";
else
posOrNeg=" -1 ";
// form the list of training samples
for(String t: extendedTreesDump){
treeBankBuffer.add(new String[] {posOrNeg+" |BT| "+t+ " |ET|"});
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return treeBankBuffer;
}
public static void main(String[] args){
String query = null;
/*" I see no meaningful distinction between complacency or complicity in the military's latest failure to uphold their own " +
"standards of conduct. Nor do I see a distinction between the service member who orchestrated this offense and the chain of " +
"command that was either oblivious to or tolerant of criminal behavior";
query = "I am now living abroad and have health insurance from Russia. How can I avoid penalty for not having health insurance in US";
query = "ECUADOR'S PRESIDENT RAFAEL CORREA SAYS U.S. VP JOE BIDEN WANTS HIM TO REFUSE WHISTLEBLOWER EDWARD SNOWDEN'S BID FOR ASYLUM";
query = "how to pay tax on foreign income from real estate";
*/
if (args!=null && args.length>0)
query = args[0];
MultiSentenceKernelBasedExtendedForestSearchResultsProcessor proc = new MultiSentenceKernelBasedExtendedForestSearchResultsProcessor();
proc.setKernelPath("C:\\stanford-corenlp\\tree_kernel\\");
proc.runSearchViaAPI(query);
}
}