package opennlp.tools.similarity.apps.solr; | |
import java.io.IOException; | |
import java.util.ArrayList; | |
import java.util.Collections; | |
import java.util.Comparator; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.Iterator; | |
import java.util.LinkedList; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Set; | |
import opennlp.tools.similarity.apps.HitBaseComparable; | |
import opennlp.tools.similarity.apps.utils.Pair; | |
import opennlp.tools.textsimilarity.ParseTreeChunkListScorer; | |
import opennlp.tools.textsimilarity.SentencePairMatchResult; | |
import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor; | |
import org.apache.commons.lang.ArrayUtils; | |
import org.apache.commons.lang.StringUtils; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.queryparser.classic.ParseException; | |
import org.apache.lucene.search.BooleanClause.Occur; | |
import org.apache.lucene.search.BooleanQuery; | |
import org.apache.lucene.search.CachingWrapperFilter; | |
import org.apache.lucene.search.Collector; | |
import org.apache.lucene.search.Filter; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.QueryWrapperFilter; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.solr.common.SolrDocument; | |
import org.apache.solr.common.SolrDocumentList; | |
import org.apache.solr.common.SolrException; | |
import org.apache.solr.common.params.CommonParams; | |
import org.apache.solr.common.params.ModifiableSolrParams; | |
import org.apache.solr.common.params.ShardParams; | |
import org.apache.solr.common.params.SolrParams; | |
import org.apache.solr.common.util.NamedList; | |
import org.apache.solr.handler.RequestHandlerBase; | |
import org.apache.solr.handler.component.ResponseBuilder; | |
import org.apache.solr.handler.component.SearchComponent; | |
import org.apache.solr.handler.component.SearchHandler; | |
import org.apache.solr.handler.component.ShardHandler; | |
import org.apache.solr.handler.component.ShardRequest; | |
import org.apache.solr.handler.component.ShardResponse; | |
import org.apache.solr.request.SolrQueryRequest; | |
import org.apache.solr.response.ResultContext; | |
import org.apache.solr.response.SolrQueryResponse; | |
import org.apache.solr.schema.SchemaField; | |
import org.apache.solr.search.DocIterator; | |
import org.apache.solr.search.DocList; | |
import org.apache.solr.search.DocSlice; | |
import org.apache.solr.search.QParser; | |
import org.apache.solr.search.SolrIndexSearcher; | |
import org.apache.solr.util.RTimer; | |
import org.apache.solr.util.SolrPluginUtils; | |
public class SyntGenRequestHandler extends SearchHandler { | |
private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer(); | |
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp){ | |
try { | |
super.handleRequestBody(req, rsp); | |
} catch (Exception e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
SolrParams reqValues = req.getOriginalParams(); | |
Iterator<String> iter = reqValues.getParameterNamesIterator(); | |
while(iter.hasNext()){ | |
System.out.println(iter.next()); | |
} | |
String param = req.getParamString(); | |
//modify rsp | |
NamedList values = rsp.getValues(); | |
ResultContext c = (ResultContext) values.get("response"); | |
if (c==null) | |
return; | |
String val1 = (String)values.get("t1"); | |
String k1 = values.getName(0); | |
k1 = values.getName(1); | |
k1 = values.getName(2); | |
k1 = values.getName(3); | |
k1 = values.getName(4); | |
DocList dList = c.docs; | |
DocList dListResult=null; | |
try { | |
dListResult = filterResultsBySyntMatchReduceDocSet(dList, | |
req, req.getParams()); | |
} catch (Exception e) { | |
dListResult = dList; | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
c.docs = dListResult; | |
values.remove("response"); | |
rsp.setAllValues(values); | |
} | |
public DocList filterResultsBySyntMatchReduceDocSet(DocList docList, | |
SolrQueryRequest req, SolrParams params) { | |
//if (!docList.hasScores()) | |
// return docList; | |
int len = docList.size(); | |
if (len < 1) // do nothing | |
return docList; | |
ParserChunker2MatcherProcessor pos = ParserChunker2MatcherProcessor .getInstance(); | |
DocIterator iter = docList.iterator(); | |
float[] syntMatchScoreArr = new float[len]; | |
String requestExpression = req.getParamString(); | |
String[] exprParts = requestExpression.split("&"); | |
for(String part: exprParts){ | |
if (part.startsWith("q=")) | |
requestExpression = part; | |
} | |
String fieldNameQuery = StringUtils.substringBetween(requestExpression, "=", ":"); | |
// extract phrase query (in double-quotes) | |
String[] queryParts = requestExpression.split("\""); | |
if (queryParts.length>=2 && queryParts[1].length()>5) | |
requestExpression = queryParts[1].replace('+', ' '); | |
else if (requestExpression.indexOf(":") > -1 ) {// still field-based expression | |
requestExpression = requestExpression.replaceAll(fieldNameQuery+":", "").replace('+',' ').replaceAll(" ", " ").replace("q=", ""); | |
} | |
if (fieldNameQuery ==null) | |
return docList; | |
if (requestExpression==null || requestExpression.length()<5 || requestExpression.split(" ").length<3) | |
return docList; | |
int[] docIDsHits = new int[len]; | |
IndexReader indexReader = req.getSearcher().getIndexReader(); | |
List<Integer> bestMatchesDocIds = new ArrayList<Integer>(); List<Float> bestMatchesScore = new ArrayList<Float>(); | |
List<Pair<Integer, Float>> docIdsScores = new ArrayList<Pair<Integer, Float>> (); | |
try { | |
for (int i=0; i<docList.size(); ++i) { | |
int docId = iter.nextDoc(); | |
docIDsHits[i] = docId; | |
Document doc = indexReader.document(docId); | |
// get text for event | |
String answerText = doc.get(fieldNameQuery); | |
if (answerText==null) | |
continue; | |
SentencePairMatchResult matchResult = pos.assessRelevance( requestExpression , answerText); | |
float syntMatchScore = new Double(parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult.getMatchResult())).floatValue(); | |
bestMatchesDocIds.add(docId); | |
bestMatchesScore.add(syntMatchScore); | |
syntMatchScoreArr[i] = (float)syntMatchScore; //*iter.score(); | |
System.out.println(" Matched query = '"+requestExpression + "' with answer = '"+answerText +"' | doc_id = '"+docId); | |
System.out.println(" Match result = '"+matchResult.getMatchResult() + "' with score = '"+syntMatchScore +"';" ); | |
docIdsScores.add(new Pair(docId, syntMatchScore)); | |
} | |
} catch (CorruptIndexException e1) { | |
// TODO Auto-generated catch block | |
e1.printStackTrace(); | |
//log.severe("Corrupt index"+e1); | |
} catch (IOException e1) { | |
// TODO Auto-generated catch block | |
e1.printStackTrace(); | |
//log.severe("File read IO / index"+e1); | |
} | |
Collections.sort(docIdsScores, new PairComparable()); | |
for(int i = 0; i<docIdsScores.size(); i++){ | |
bestMatchesDocIds.set(i, docIdsScores.get(i).getFirst()); | |
bestMatchesScore.set(i, docIdsScores.get(i).getSecond()); | |
} | |
System.out.println(bestMatchesScore); | |
float maxScore = docList.maxScore(); // do not change | |
int limit = docIdsScores.size(); | |
int start = 0; | |
DocSlice ds = null; | |
ds = new DocSlice(start, limit, | |
ArrayUtils.toPrimitive(bestMatchesDocIds.toArray(new Integer[0])), | |
ArrayUtils.toPrimitive(bestMatchesScore.toArray(new Float[0])), | |
bestMatchesDocIds.size(), maxScore); | |
return ds; | |
} | |
public void handleRequestBody1(SolrQueryRequest req, SolrQueryResponse rsp) | |
throws Exception { | |
// extract params from request | |
SolrParams params = req.getParams(); | |
String q = params.get(CommonParams.Q); | |
String[] fqs = params.getParams(CommonParams.FQ); | |
int start = 0; | |
try { start = Integer.parseInt(params.get(CommonParams.START)); } | |
catch (Exception e) { /* default */ } | |
int rows = 0; | |
try { rows = Integer.parseInt(params.get(CommonParams.ROWS)); } | |
catch (Exception e) { /* default */ } | |
//SolrPluginUtils.setReturnFields(req, rsp); | |
// build initial data structures | |
SolrDocumentList results = new SolrDocumentList(); | |
SolrIndexSearcher searcher = req.getSearcher(); | |
Map<String,SchemaField> fields = req.getSchema().getFields(); | |
int ndocs = start + rows; | |
Filter filter = buildFilter(fqs, req); | |
Set<Integer> alreadyFound = new HashSet<Integer>(); | |
// invoke the various sub-handlers in turn and return results | |
doSearch1(results, searcher, q, filter, ndocs, req, | |
fields, alreadyFound); | |
// ... more sub-handler calls here ... | |
// build and write response | |
float maxScore = 0.0F; | |
int numFound = 0; | |
List<SolrDocument> slice = new ArrayList<SolrDocument>(); | |
for (Iterator<SolrDocument> it = results.iterator(); it.hasNext(); ) { | |
SolrDocument sdoc = it.next(); | |
Float score = (Float) sdoc.getFieldValue("score"); | |
if (maxScore < score) { | |
maxScore = score; | |
} | |
if (numFound >= start && numFound < start + rows) { | |
slice.add(sdoc); | |
} | |
numFound++; | |
} | |
results.clear(); | |
results.addAll(slice); | |
results.setNumFound(numFound); | |
results.setMaxScore(maxScore); | |
results.setStart(start); | |
rsp.add("response", results); | |
} | |
private Filter buildFilter(String[] fqs, SolrQueryRequest req) | |
throws IOException, ParseException { | |
if (fqs != null && fqs.length > 0) { | |
BooleanQuery fquery = new BooleanQuery(); | |
for (int i = 0; i < fqs.length; i++) { | |
QParser parser = null; | |
try { | |
parser = QParser.getParser(fqs[i], null, req); | |
} catch (Exception e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
try { | |
fquery.add(parser.getQuery(), Occur.MUST); | |
} catch (Exception e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
} | |
return new CachingWrapperFilter(new QueryWrapperFilter(fquery)); | |
} | |
return null; | |
} | |
private void doSearch1(SolrDocumentList results, | |
SolrIndexSearcher searcher, String q, Filter filter, | |
int ndocs, SolrQueryRequest req, | |
Map<String,SchemaField> fields, Set<Integer> alreadyFound) | |
throws IOException { | |
// build custom query and extra fields | |
Query query = null; //buildCustomQuery1(q); | |
Map<String,Object> extraFields = new HashMap<String,Object>(); | |
extraFields.put("search_type", "search1"); | |
boolean includeScore = | |
req.getParams().get(CommonParams.FL).contains("score"); | |
int maxDocsPerSearcherType = 0; | |
float maprelScoreCutoff = 2.0f; | |
append(results, searcher.search( | |
query, filter, maxDocsPerSearcherType).scoreDocs, | |
alreadyFound, fields, extraFields, maprelScoreCutoff , | |
searcher.getIndexReader(), includeScore); | |
} | |
// ... more doSearchXXX() calls here ... | |
private void append(SolrDocumentList results, ScoreDoc[] more, | |
Set<Integer> alreadyFound, Map<String,SchemaField> fields, | |
Map<String,Object> extraFields, float scoreCutoff, | |
IndexReader reader, boolean includeScore) throws IOException { | |
for (ScoreDoc hit : more) { | |
if (alreadyFound.contains(hit.doc)) { | |
continue; | |
} | |
Document doc = reader.document(hit.doc); | |
SolrDocument sdoc = new SolrDocument(); | |
for (String fieldname : fields.keySet()) { | |
SchemaField sf = fields.get(fieldname); | |
if (sf.stored()) { | |
sdoc.addField(fieldname, doc.get(fieldname)); | |
} | |
} | |
for (String extraField : extraFields.keySet()) { | |
sdoc.addField(extraField, extraFields.get(extraField)); | |
} | |
if (includeScore) { | |
sdoc.addField("score", hit.score); | |
} | |
results.add(sdoc); | |
alreadyFound.add(hit.doc); | |
} | |
} | |
public class PairComparable implements Comparator<Pair> { | |
// @Override | |
public int compare(Pair o1, Pair o2) { | |
int b = -2; | |
if ( o1.getSecond() instanceof Float && o2.getSecond() instanceof Float){ | |
b = (((Float)o1.getSecond()> (Float)o2.getSecond()) ? -1 | |
: (((Float)o1.getSecond() == (Float)o2.getSecond()) ? 0 : 1)); | |
} | |
return b; | |
} | |
} | |
} | |
/* | |
* | |
* | |
* http://localhost:8080/solr/syntgen/?q=add-style-to-your-every-day-fresh-design-iphone-cases&t1=Personalized+iPhone+Cases&d1=Add+style+to+your+every+day+with+a+custom+iPhone+case&t2=Personalized+iPhone+Cases&d2=Add+style+to+your+every+day+with+a+custom+iPhone+case&t3=Personalized+iPhone+Cases&d3=Add+style+to+your+every+day+with+a+custom+iPhone+case&t4=Personalized+iPhone+Cases&d4=add+style+to+your+every+day+with+a+custom+iPhone+case | |
* */ |