blob: cb30ef33f4f86d85720e1f770077441166b96efe [file] [log] [blame]
package org.apache.maven.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.maven.index.context.IndexUtils;
import org.apache.maven.index.context.IndexingContext;
import org.apache.maven.index.context.NexusIndexMultiSearcher;
import org.apache.maven.index.creator.JarFileContentsIndexCreator;
/**
* Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
*
* @author cstamas
*/
public class DefaultIteratorResultSet
implements IteratorResultSet
{
private final IteratorSearchRequest searchRequest;
private final NexusIndexMultiSearcher indexSearcher;
private final List<IndexingContext> contexts;
private final int[] starts;
private final ArtifactInfoFilter filter;
private final ArtifactInfoPostprocessor postprocessor;
private final List<MatchHighlightRequest> matchHighlightRequests;
private final TopDocs hits;
private final int from;
private final int count;
private final int maxRecPointer;
private int pointer;
private int processedArtifactInfoCount;
private ArtifactInfo ai;
protected DefaultIteratorResultSet( final IteratorSearchRequest request,
final NexusIndexMultiSearcher indexSearcher,
final List<IndexingContext> contexts, final TopDocs hits )
throws IOException
{
this.searchRequest = request;
this.indexSearcher = indexSearcher;
this.contexts = contexts;
{
int maxDoc = 0;
this.starts = new int[contexts.size() + 1]; // build starts array
// this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
// #acquire() already invoked on underlying NexusIndexMultiReader
final List<IndexSearcher> acquiredSearchers =
indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
for ( int i = 0; i < contexts.size(); i++ )
{
starts[i] = maxDoc;
maxDoc += acquiredSearchers.get( i ).getIndexReader().maxDoc(); // compute maxDocs
}
starts[contexts.size()] = maxDoc;
}
this.filter = request.getArtifactInfoFilter();
this.postprocessor = request.getArtifactInfoPostprocessor();
this.matchHighlightRequests = request.getMatchHighlightRequests();
List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() )
{
Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() );
matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery,
hr.getHighlightMode() ) );
}
this.hits = hits;
this.from = request.getStart();
this.count =
( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
request.getCount(), hits.scoreDocs.length ) );
this.pointer = from;
this.processedArtifactInfoCount = 0;
this.maxRecPointer = from + count;
ai = createNextAi();
if ( ai == null )
{
cleanUp();
}
}
public boolean hasNext()
{
return ai != null;
}
public ArtifactInfo next()
{
ArtifactInfo result = ai;
try
{
ai = createNextAi();
}
catch ( IOException e )
{
ai = null;
throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
}
finally
{
if ( ai == null )
{
cleanUp();
}
}
return result;
}
public void remove()
{
throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
}
public Iterator<ArtifactInfo> iterator()
{
return this;
}
public void close()
{
cleanUp();
}
public int getTotalProcessedArtifactInfoCount()
{
return processedArtifactInfoCount;
}
@Override
public void finalize()
throws Throwable
{
super.finalize();
if ( !cleanedUp )
{
System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
+ searchRequest.getQuery().toString() );
cleanUp();
}
}
// ==
protected ArtifactInfo createNextAi()
throws IOException
{
ArtifactInfo result = null;
// we should stop if:
// a) we found what we want
// b) pointer advanced over more documents that user requested
// c) pointer advanced over more documents that hits has
// or we found what we need
while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
{
Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );
IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );
result = IndexUtils.constructArtifactInfo( doc, context );
if ( result != null )
{
// WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
// For debugging only!!!
if ( searchRequest.isLuceneExplain() )
{
result.getAttributes().put( Explanation.class.getName(),
indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
}
result.setLuceneScore( hits.scoreDocs[pointer].score );
result.setRepository( context.getRepositoryId() );
result.setContext( context.getId() );
if ( filter != null )
{
if ( !filter.accepts( context, result ) )
{
result = null;
}
}
if ( result != null && postprocessor != null )
{
postprocessor.postprocess( context, result );
}
if ( result != null && matchHighlightRequests.size() > 0 )
{
calculateHighlights( context, doc, result );
}
}
pointer++;
processedArtifactInfoCount++;
}
return result;
}
private volatile boolean cleanedUp = false;
protected synchronized void cleanUp()
{
if ( cleanedUp )
{
return;
}
try
{
indexSearcher.release();
}
catch ( IOException e )
{
throw new IllegalStateException( e );
}
this.cleanedUp = true;
}
/**
* Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
*
* @param context
* @param d
* @param ai
*/
protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
throws IOException
{
IndexerField field = null;
String text = null;
List<String> highlightFragment = null;
for ( MatchHighlightRequest hr : matchHighlightRequests )
{
field = selectStoredIndexerField( hr.getField() );
if ( field != null )
{
text = ai.getFieldValue( field.getOntology() );
if ( text != null )
{
highlightFragment = highlightField( context, hr, field, text );
if ( highlightFragment != null && highlightFragment.size() > 0 )
{
MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );
ai.getMatchHighlights().add( matchHighlight );
}
}
}
}
}
/**
* Select a STORED IndexerField assigned to passed in Field.
*
* @param field
* @return
*/
protected IndexerField selectStoredIndexerField( Field field )
{
// hack here
if ( MAVEN.CLASSNAMES.equals( field ) )
{
return JarFileContentsIndexCreator.FLD_CLASSNAMES;
}
else
{
return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
}
}
/**
* Returns a string that contains match fragment highlighted in style as user requested.
*
* @param context
* @param hr
* @param field
* @param text
* @return
* @throws IOException
*/
protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
String text )
throws IOException
{
// exception with classnames
if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
{
text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
}
Analyzer analyzer = context.getAnalyzer();
TokenStream baseTokenStream = analyzer.tokenStream( field.getKey(), new StringReader( text ) );
CachingTokenFilter tokenStream = new CachingTokenFilter( baseTokenStream );
Formatter formatter = null;
if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
{
formatter = new SimpleHTMLFormatter();
}
else
{
tokenStream.reset();
tokenStream.end();
tokenStream.close();
throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
+ "\" is not supported!" );
}
List<String> bestFragments = getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 );
return bestFragments;
}
protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
String text, int maxNumFragments )
throws IOException
{
Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
highlighter.setTextFragmenter( new OneLineFragmenter() );
maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
TextFragment[] frag;
// Get text
ArrayList<String> fragTexts = new ArrayList<>( maxNumFragments );
try
{
frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
for ( int i = 0; i < frag.length; i++ )
{
if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
{
fragTexts.add( frag[i].toString() );
}
}
}
catch ( InvalidTokenOffsetsException e )
{
// empty?
}
return fragTexts;
}
protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
{
return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
}
private static int readerIndex( int n, int[] starts, int numSubReaders )
{ // find reader for doc n:
int lo = 0; // search starts array
int hi = numSubReaders - 1; // for first element less
while ( hi >= lo )
{
int mid = ( lo + hi ) >>> 1;
int midValue = starts[mid];
if ( n < midValue )
{
hi = mid - 1;
}
else if ( n > midValue )
{
lo = mid + 1;
}
else
{ // found a match
while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
{
mid++; // scan to last match
}
return mid;
}
}
return hi;
}
}