blob: 5ca4b4230c7d8574d4983c667dbd8c765ae181ce [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.maven.index;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.context.NexusIndexMultiSearcher;
import org.apache.maven.index.creator.JarFileContentsIndexCreator;
/**
* Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
*
* @author cstamas
*/
public class DefaultIteratorResultSet implements IteratorResultSet {
private final IteratorSearchRequest searchRequest;
private final NexusIndexMultiSearcher indexSearcher;
private final List<IndexingContext> contexts;
private final int[] starts;
private final ArtifactInfoFilter filter;
private final ArtifactInfoPostprocessor postprocessor;
private final List<MatchHighlightRequest> matchHighlightRequests;
private final TopDocs hits;
private final int from;
private final int count;
private final int maxRecPointer;
private int pointer;
private int processedArtifactInfoCount;
private ArtifactInfo ai;
protected DefaultIteratorResultSet(
final IteratorSearchRequest request,
final NexusIndexMultiSearcher indexSearcher,
final List<IndexingContext> contexts,
final TopDocs hits)
throws IOException {
this.searchRequest = request;
this.indexSearcher = indexSearcher;
this.contexts = contexts;
{
int maxDoc = 0;
this.starts = new int[contexts.size() + 1]; // build starts array
// this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
// #acquire() already invoked on underlying NexusIndexMultiReader
final List<IndexSearcher> acquiredSearchers =
indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
for (int i = 0; i < contexts.size(); i++) {
starts[i] = maxDoc;
maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc(); // compute maxDocs
}
starts[contexts.size()] = maxDoc;
}
this.filter = request.getArtifactInfoFilter();
this.postprocessor = request.getArtifactInfoPostprocessor();
this.matchHighlightRequests = request.getMatchHighlightRequests();
List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) {
Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader());
matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode()));
}
this.hits = hits;
this.from = request.getStart();
this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED
? hits.scoreDocs.length
: Math.min(request.getCount(), hits.scoreDocs.length));
this.pointer = from;
this.processedArtifactInfoCount = 0;
this.maxRecPointer = from + count;
ai = createNextAi();
if (ai == null) {
cleanUp();
}
}
public boolean hasNext() {
return ai != null;
}
public ArtifactInfo next() {
ArtifactInfo result = ai;
try {
ai = createNextAi();
} catch (IOException e) {
ai = null;
throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e);
} finally {
if (ai == null) {
cleanUp();
}
}
return result;
}
public void remove() {
throw new UnsupportedOperationException(
"Method not supported on " + getClass().getName());
}
public Iterator<ArtifactInfo> iterator() {
return this;
}
public void close() {
cleanUp();
}
public int getTotalProcessedArtifactInfoCount() {
return processedArtifactInfoCount;
}
@Override
public void finalize() throws Throwable {
super.finalize();
if (!cleanedUp) {
System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query "
+ searchRequest.getQuery().toString());
cleanUp();
}
}
// ==
protected ArtifactInfo createNextAi() throws IOException {
ArtifactInfo result = null;
// we should stop if:
// a) we found what we want
// b) pointer advanced over more documents that user requested
// c) pointer advanced over more documents that hits has
// or we found what we need
while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) {
Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc);
IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc);
result = IndexUtils.constructArtifactInfo(doc, context);
if (result != null) {
// WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
// For debugging only!!!
if (searchRequest.isLuceneExplain()) {
result.getAttributes()
.put(
Explanation.class.getName(),
indexSearcher
.explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc)
.toString());
}
result.setLuceneScore(hits.scoreDocs[pointer].score);
result.setRepository(context.getRepositoryId());
result.setContext(context.getId());
if (filter != null) {
if (!filter.accepts(context, result)) {
result = null;
}
}
if (result != null && postprocessor != null) {
postprocessor.postprocess(context, result);
}
if (result != null && matchHighlightRequests.size() > 0) {
calculateHighlights(context, doc, result);
}
}
pointer++;
processedArtifactInfoCount++;
}
return result;
}
private volatile boolean cleanedUp = false;
protected synchronized void cleanUp() {
if (cleanedUp) {
return;
}
try {
indexSearcher.release();
} catch (IOException e) {
throw new IllegalStateException(e);
}
this.cleanedUp = true;
}
/**
* Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
*
* @param context
* @param d
* @param ai
*/
protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException {
IndexerField field;
String text;
List<String> highlightFragment;
for (MatchHighlightRequest hr : matchHighlightRequests) {
field = selectStoredIndexerField(hr.getField());
if (field != null) {
text = ai.getFieldValue(field.getOntology());
if (text != null) {
highlightFragment = highlightField(context, hr, field, text);
if (highlightFragment != null && highlightFragment.size() > 0) {
MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment);
ai.getMatchHighlights().add(matchHighlight);
}
}
}
}
}
/**
* Select a STORED IndexerField assigned to passed in Field.
*
* @param field
* @return
*/
protected IndexerField selectStoredIndexerField(Field field) {
// hack here
if (MAVEN.CLASSNAMES.equals(field)) {
return JarFileContentsIndexCreator.FLD_CLASSNAMES;
} else {
return field.getIndexerFields().isEmpty()
? null
: field.getIndexerFields().iterator().next();
}
}
/**
* Returns a string that contains match fragment highlighted in style as user requested.
*
* @param context
* @param hr
* @param field
* @param text
* @return
* @throws IOException
*/
protected List<String> highlightField(
IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException {
// exception with classnames
if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
}
Analyzer analyzer = context.getAnalyzer();
TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));
CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);
Formatter formatter;
if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
formatter = new SimpleHTMLFormatter();
} else {
tokenStream.reset();
tokenStream.end();
tokenStream.close();
throw new UnsupportedOperationException(
"Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
}
List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);
return bestFragments;
}
protected final List<String> getBestFragments(
Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments)
throws IOException {
Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));
highlighter.setTextFragmenter(new OneLineFragmenter());
maxNumFragments = Math.max(1, maxNumFragments); // sanity check
TextFragment[] frag;
// Get text
ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);
try {
frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);
for (TextFragment textFragment : frag) {
if ((textFragment != null) && (textFragment.getScore() > 0)) {
fragTexts.add(textFragment.toString());
}
}
} catch (InvalidTokenOffsetsException e) {
// empty?
}
return fragTexts;
}
protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) {
return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size()));
}
private static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n:
int lo = 0; // search starts array
int hi = numSubReaders - 1; // for first element less
while (hi >= lo) {
int mid = (lo + hi) >>> 1;
int midValue = starts[mid];
if (n < midValue) {
hi = mid - 1;
} else if (n > midValue) {
lo = mid + 1;
} else { // found a match
while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
}