blob: 97ab041e5149ed61c0ad125940a75a73a31ebade [file] [log] [blame]
package opennlp.tools.parse_thicket.matching;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.deeplearning4j.berkeley.Pair;
import opennlp.tools.textsimilarity.ParseTreeChunk;
import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
import opennlp.tools.textsimilarity.TextProcessor;
import org.apache.log4j.Level;
import org.apache.log4j.LogManager;
import edu.stanford.nlp.util.StringUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.*;
/**
* Created by sanviswa on 10/29/16.
*/
public class MyMatcher {
Matcher m = new Matcher();
public static void main(String[] args) throws Exception
{
MyMatcher myMatcher = new MyMatcher();
myMatcher.runTest(myMatcher.readFile());
}
public List<String> readFile() throws Exception
{
// BufferedReader br = new BufferedReader(new FileReader(this.getClass().getResource("/fidelity.txt").getPath()));
List<String> al = new ArrayList<String>();
/* String line = null;
while ((line = br.readLine()) != null) {
al.add(line);
}
br.close(); */
String content = FileUtils.readFileToString(new File("/Users/bgalitsky/Documents/relevance-based-on-parse-trees/fidelity.txt"));
String[] als = content.split("\n");
al = Arrays.asList(als);
return al;
}
public void runTest(List<String> lst) throws Exception
{
System.out.println("Enter text: ");
Scanner scanner = new Scanner(System.in);
String queryStr = scanner.nextLine();
if("quit".equals(queryStr))
{
return;
}
else
{
checkLinguisticScores(queryStr,lst);
runTest(lst);
}
}
public void checkLinguisticScores(String q, List<String> aList) throws Exception
{ // convert query into list of tokens
List<String> queryTokens = TextProcessor.fastTokenize(q.toLowerCase(), false);
List<String> shortListedClasses = new ArrayList<String>();
for (String ans: aList) {
// convert answer class into the list of tokens
List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
// do intersection of tokens
classTokens.retainAll(queryTokens);
int tokenScore = 0;
// count significant tokens / no stopwords
for(String word: classTokens){
if (word.length()>2 && StringUtils.isAlpha(word))
tokenScore++;
}
if (tokenScore>1)
shortListedClasses.add(ans);
}
// do it again with lower thresh, if too few results
if (shortListedClasses.size()<5)
for (String ans: aList) {
List<String> classTokens = TextProcessor.fastTokenize(ans.toLowerCase(), false);
classTokens.retainAll(queryTokens);
int tokenScore = 0;
for(String word: classTokens){
if (word.length()>2 && StringUtils.isAlpha(word))
tokenScore++;
}
if (tokenScore>=1)
shortListedClasses.add(ans);
}
// if no overlap give up of do the full list
if (shortListedClasses.isEmpty())
shortListedClasses = aList;
ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
ArrayList<Pair<String,Double>> pairList = new ArrayList<Pair<String,Double>>();
for (String ans: shortListedClasses) {
List<List<ParseTreeChunk>> res = m.assessRelevanceCache(q, ans);
double score1 = parseTreeChunkListScorer.getParseTreeChunkListScoreAggregPhraseType(res);
Pair<String,Double> p = new Pair<String, Double>(ans, score1);
pairList.add(p);
}
Collections.sort(pairList, Comparator.comparing(p -> p.getSecond()));
System.out.println("***** '" + q + "' ******* falls into the following categories: ");
for (Pair<String, Double> score: pairList) {
System.out.println(" " + score.getFirst() + ": " + score.getSecond());
}
}
}