opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/MyMatcher.java - opennlp-sandbox - Git at Google

 package opennlp.tools.parse_thicket.matching;

 import org.apache.commons.io.FileUtils;
 import org.apache.log4j.Logger;
 import org.deeplearning4j.berkeley.Pair;

 import opennlp.tools.textsimilarity.ParseTreeChunk;
 import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
 import opennlp.tools.textsimilarity.TextProcessor;

 import org.apache.log4j.Level;
 import org.apache.log4j.LogManager;

 import edu.stanford.nlp.util.StringUtils;

 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.util.*;

 /**
  * Created by sanviswa on 10/29/16.
  */
 public class MyMatcher {

     Matcher m = new Matcher();


     public static void main(String[] args) throws Exception
     {

         MyMatcher myMatcher = new MyMatcher();
         myMatcher.runTest(myMatcher.readFile());
     }

     public List<String> readFile() throws Exception
     {
      //   BufferedReader br = new BufferedReader(new FileReader(this.getClass().getResource("/fidelity.txt").getPath()));
         List<String> al = new ArrayList<String>();
      /*   String line = null;
         while ((line = br.readLine()) != null) {

             al.add(line);
         }
         br.close(); */
     	String content = FileUtils.readFileToString(new File("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/fidelity.txt"));
         String[] als = content.split("\n");
     	al = Arrays.asList(als);
     	return al;
     }

     public void runTest(List<String> lst) throws Exception
     {
         System.out.println("Enter text: ");
         Scanner scanner = new Scanner(System.in);
         String queryStr = scanner.nextLine();
         if("quit".equals(queryStr))
         {
             return;
         }
         else
         {
             checkLinguisticScores(queryStr,lst);
             runTest(lst);
         }

     }

     public void checkLinguisticScores(String q, List<String> aList) throws Exception
     {   // convert query into list of tokens
     	List<String> queryTokens = TextProcessor.fastTokenize(q.toLowerCase(), false);

     	List<String> shortListedClasses = new ArrayList<String>();
     	for (String ans: aList) {
     		// convert answer class into the list of tokens
     		List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
     		// do intersection of tokens
     		classTokens.retainAll(queryTokens);
     		int tokenScore = 0;
     		// count significant tokens / no stopwords
     		for(String word: classTokens){
     			if (word.length()>2 && StringUtils.isAlpha(word))
     				tokenScore++;
     		}
     		if (tokenScore>1)
     			 shortListedClasses.add(ans);
     	}
     	// do it again with lower thresh, if too few results
     	if (shortListedClasses.size()<5)
     		for (String ans: aList) {
         		List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
         		classTokens.retainAll(queryTokens);
         		int tokenScore = 0;
         		for(String word: classTokens){
         			if (word.length()>2 && StringUtils.isAlpha(word))
         				tokenScore++;
         		}
         		if (tokenScore>=1)
         			 shortListedClasses.add(ans);
         	}
     	// if no overlap give up of do the full list
     	if (shortListedClasses.isEmpty())
     		shortListedClasses = aList;

         ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

         ArrayList<Pair<String,Double>> pairList = new ArrayList<Pair<String,Double>>();

         for (String ans: shortListedClasses) {

             List<List<ParseTreeChunk>> res = m.assessRelevanceCache(q, ans);
             double score1 = parseTreeChunkListScorer.getParseTreeChunkListScoreAggregPhraseType(res);
             Pair<String,Double> p = new Pair<String, Double>(ans, score1);
             pairList.add(p);
         }

         Collections.sort(pairList, Comparator.comparing(p -> p.getSecond()));

       System.out.println("***** '" + q + "' ******* falls into the following categories: ");
         for (Pair<String, Double> score: pairList) {
             System.out.println("        " + score.getFirst() + ": " + score.getSecond());
         }


     }
 }
	package opennlp.tools.parse_thicket.matching;

	import org.apache.commons.io.FileUtils;
	import org.apache.log4j.Logger;
	import org.deeplearning4j.berkeley.Pair;

	import opennlp.tools.textsimilarity.ParseTreeChunk;
	import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
	import opennlp.tools.textsimilarity.TextProcessor;

	import org.apache.log4j.Level;
	import org.apache.log4j.LogManager;

	import edu.stanford.nlp.util.StringUtils;

	import java.io.BufferedReader;
	import java.io.File;
	import java.io.FileReader;
	import java.util.*;

	/**
	* Created by sanviswa on 10/29/16.
	*/
	public class MyMatcher {

	Matcher m = new Matcher();


	public static void main(String[] args) throws Exception
	{

	MyMatcher myMatcher = new MyMatcher();
	myMatcher.runTest(myMatcher.readFile());
	}

	public List<String> readFile() throws Exception
	{
	// BufferedReader br = new BufferedReader(new FileReader(this.getClass().getResource("/fidelity.txt").getPath()));
	List<String> al = new ArrayList<String>();
	/* String line = null;
	while ((line = br.readLine()) != null) {

	al.add(line);
	}
	br.close(); */
	String content = FileUtils.readFileToString(new File("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/fidelity.txt"));
	String[] als = content.split("\n");
	al = Arrays.asList(als);
	return al;
	}

	public void runTest(List<String> lst) throws Exception
	{
	System.out.println("Enter text: ");
	Scanner scanner = new Scanner(System.in);
	String queryStr = scanner.nextLine();
	if("quit".equals(queryStr))
	{
	return;
	}
	else
	{
	checkLinguisticScores(queryStr,lst);
	runTest(lst);
	}

	}

	public void checkLinguisticScores(String q, List<String> aList) throws Exception
	{ // convert query into list of tokens
	List<String> queryTokens = TextProcessor.fastTokenize(q.toLowerCase(), false);

	List<String> shortListedClasses = new ArrayList<String>();
	for (String ans: aList) {
	// convert answer class into the list of tokens
	List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
	// do intersection of tokens
	classTokens.retainAll(queryTokens);
	int tokenScore = 0;
	// count significant tokens / no stopwords
	for(String word: classTokens){
	if (word.length()>2 && StringUtils.isAlpha(word))
	tokenScore++;
	}
	if (tokenScore>1)
	shortListedClasses.add(ans);
	}
	// do it again with lower thresh, if too few results
	if (shortListedClasses.size()<5)
	for (String ans: aList) {
	List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
	classTokens.retainAll(queryTokens);
	int tokenScore = 0;
	for(String word: classTokens){
	if (word.length()>2 && StringUtils.isAlpha(word))
	tokenScore++;
	}
	if (tokenScore>=1)
	shortListedClasses.add(ans);
	}
	// if no overlap give up of do the full list
	if (shortListedClasses.isEmpty())
	shortListedClasses = aList;

	ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();

	ArrayList<Pair<String,Double>> pairList = new ArrayList<Pair<String,Double>>();

	for (String ans: shortListedClasses) {

	List<List<ParseTreeChunk>> res = m.assessRelevanceCache(q, ans);
	double score1 = parseTreeChunkListScorer.getParseTreeChunkListScoreAggregPhraseType(res);
	Pair<String,Double> p = new Pair<String, Double>(ans, score1);
	pairList.add(p);
	}

	Collections.sort(pairList, Comparator.comparing(p -> p.getSecond()));

	System.out.println("*** '" + q + "' ***** falls into the following categories: ");
	for (Pair<String, Double> score: pairList) {
	System.out.println(" " + score.getFirst() + ": " + score.getSecond());
	}


	}
	}