| package org.apache.maven.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| import org.apache.lucene.analysis.Analyzer; |
| |
| import org.apache.lucene.analysis.CachingTokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.search.Explanation; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.highlight.Formatter; |
| import org.apache.lucene.search.highlight.Highlighter; |
| import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
| import org.apache.lucene.search.highlight.QueryScorer; |
| import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
| import org.apache.lucene.search.highlight.TextFragment; |
| import org.apache.maven.index.context.IndexUtils; |
| import org.apache.maven.index.context.IndexingContext; |
| import org.apache.maven.index.context.NexusIndexMultiSearcher; |
| import org.apache.maven.index.creator.JarFileContentsIndexCreator; |
| |
| /** |
| * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this! |
| * |
| * @author cstamas |
| */ |
| public class DefaultIteratorResultSet |
| implements IteratorResultSet |
| { |
| private final IteratorSearchRequest searchRequest; |
| |
| private final NexusIndexMultiSearcher indexSearcher; |
| |
| private final List<IndexingContext> contexts; |
| |
| private final int[] starts; |
| |
| private final ArtifactInfoFilter filter; |
| |
| private final ArtifactInfoPostprocessor postprocessor; |
| |
| private final List<MatchHighlightRequest> matchHighlightRequests; |
| |
| private final TopDocs hits; |
| |
| private final int from; |
| |
| private final int count; |
| |
| private final int maxRecPointer; |
| |
| private int pointer; |
| |
| private int processedArtifactInfoCount; |
| |
| private ArtifactInfo ai; |
| |
| protected DefaultIteratorResultSet( final IteratorSearchRequest request, |
| final NexusIndexMultiSearcher indexSearcher, |
| final List<IndexingContext> contexts, final TopDocs hits ) |
| throws IOException |
| { |
| this.searchRequest = request; |
| |
| this.indexSearcher = indexSearcher; |
| |
| this.contexts = contexts; |
| |
| { |
| int maxDoc = 0; |
| this.starts = new int[contexts.size() + 1]; // build starts array |
| // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence |
| // #acquire() already invoked on underlying NexusIndexMultiReader |
| final List<IndexSearcher> acquiredSearchers = indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers(); |
| for ( int i = 0; i < contexts.size(); i++ ) |
| { |
| starts[i] = maxDoc; |
| maxDoc += acquiredSearchers.get( i ).getIndexReader().maxDoc(); // compute maxDocs |
| } |
| starts[contexts.size()] = maxDoc; |
| } |
| |
| this.filter = request.getArtifactInfoFilter(); |
| |
| this.postprocessor = request.getArtifactInfoPostprocessor(); |
| |
| this.matchHighlightRequests = request.getMatchHighlightRequests(); |
| |
| List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<MatchHighlightRequest>(); |
| for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() ) |
| { |
| Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() ); |
| matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery, hr.getHighlightMode() ) ); |
| } |
| |
| this.hits = hits; |
| |
| this.from = request.getStart(); |
| |
| this.count = |
| ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min( |
| request.getCount(), hits.scoreDocs.length ) ); |
| |
| this.pointer = from; |
| |
| this.processedArtifactInfoCount = 0; |
| |
| this.maxRecPointer = from + count; |
| |
| ai = createNextAi(); |
| |
| if ( ai == null ) |
| { |
| cleanUp(); |
| } |
| } |
| |
| public boolean hasNext() |
| { |
| return ai != null; |
| } |
| |
| public ArtifactInfo next() |
| { |
| ArtifactInfo result = ai; |
| |
| try |
| { |
| ai = createNextAi(); |
| } |
| catch ( IOException e ) |
| { |
| ai = null; |
| |
| throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e ); |
| } |
| finally |
| { |
| if ( ai == null ) |
| { |
| cleanUp(); |
| } |
| } |
| |
| return result; |
| } |
| |
| public void remove() |
| { |
| throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() ); |
| } |
| |
| public Iterator<ArtifactInfo> iterator() |
| { |
| return this; |
| } |
| |
| public void close() |
| { |
| cleanUp(); |
| } |
| |
| public int getTotalProcessedArtifactInfoCount() |
| { |
| return processedArtifactInfoCount; |
| } |
| |
| @Override |
| public void finalize() |
| throws Throwable |
| { |
| super.finalize(); |
| |
| if ( !cleanedUp ) |
| { |
| System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query " |
| + searchRequest.getQuery().toString() ); |
| |
| cleanUp(); |
| } |
| } |
| |
| // == |
| |
| protected ArtifactInfo createNextAi() |
| throws IOException |
| { |
| ArtifactInfo result = null; |
| |
| // we should stop if: |
| // a) we found what we want |
| // b) pointer advanced over more documents that user requested |
| // c) pointer advanced over more documents that hits has |
| // or we found what we need |
| while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) ) |
| { |
| Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc ); |
| |
| IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc ); |
| |
| result = IndexUtils.constructArtifactInfo( doc, context ); |
| |
| if ( result != null ) |
| { |
| // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION |
| // For debugging only!!! |
| if ( searchRequest.isLuceneExplain() ) |
| { |
| result.getAttributes().put( Explanation.class.getName(), |
| indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() ); |
| } |
| |
| result.setLuceneScore( hits.scoreDocs[pointer].score ); |
| |
| result.setRepository( context.getRepositoryId() ); |
| |
| result.setContext( context.getId() ); |
| |
| if ( filter != null ) |
| { |
| if ( !filter.accepts( context, result ) ) |
| { |
| result = null; |
| } |
| } |
| |
| if ( result != null && postprocessor != null ) |
| { |
| postprocessor.postprocess( context, result ); |
| } |
| |
| if ( result != null && matchHighlightRequests.size() > 0 ) |
| { |
| calculateHighlights( context, doc, result ); |
| } |
| } |
| |
| pointer++; |
| processedArtifactInfoCount++; |
| } |
| |
| return result; |
| } |
| |
| private volatile boolean cleanedUp = false; |
| |
| protected synchronized void cleanUp() |
| { |
| if ( cleanedUp ) |
| { |
| return; |
| } |
| |
| try |
| { |
| indexSearcher.release(); |
| } |
| catch ( IOException e ) |
| { |
| throw new IllegalStateException( e ); |
| } |
| |
| this.cleanedUp = true; |
| } |
| |
| /** |
| * Creates the MatchHighlights and adds them to ArtifactInfo if found/can. |
| * |
| * @param context |
| * @param d |
| * @param ai |
| */ |
| protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai ) |
| throws IOException |
| { |
| IndexerField field = null; |
| |
| String text = null; |
| |
| List<String> highlightFragment = null; |
| |
| for ( MatchHighlightRequest hr : matchHighlightRequests ) |
| { |
| field = selectStoredIndexerField( hr.getField() ); |
| |
| if ( field != null ) |
| { |
| text = ai.getFieldValue( field.getOntology() ); |
| |
| if ( text != null ) |
| { |
| highlightFragment = highlightField( context, hr, field, text ); |
| |
| if ( highlightFragment != null && highlightFragment.size() > 0 ) |
| { |
| MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment ); |
| |
| ai.getMatchHighlights().add( matchHighlight ); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Select a STORED IndexerField assigned to passed in Field. |
| * |
| * @param field |
| * @return |
| */ |
| protected IndexerField selectStoredIndexerField( Field field ) |
| { |
| // hack here |
| if ( MAVEN.CLASSNAMES.equals( field ) ) |
| { |
| return JarFileContentsIndexCreator.FLD_CLASSNAMES; |
| } |
| else |
| { |
| return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next(); |
| } |
| } |
| |
| /** |
| * Returns a string that contains match fragment highlighted in style as user requested. |
| * |
| * @param context |
| * @param hr |
| * @param field |
| * @param text |
| * @return |
| * @throws IOException |
| */ |
| protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field, |
| String text ) |
| throws IOException |
| { |
| // exception with classnames |
| if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) ) |
| { |
| text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" ); |
| } |
| |
| Analyzer analyzer = context.getAnalyzer(); |
| TokenStream baseTokenStream = analyzer.tokenStream( field.getKey(), new StringReader( text ) ); |
| baseTokenStream.reset(); |
| |
| CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream); |
| |
| Formatter formatter = null; |
| |
| if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) ) |
| { |
| formatter = new SimpleHTMLFormatter(); |
| } |
| else |
| { |
| tokenStream.reset(); |
| tokenStream.end(); |
| tokenStream.close(); |
| throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString() |
| + "\" is not supported!" ); |
| } |
| |
| List<String> bestFragments = getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 ); |
| |
| tokenStream.end(); |
| tokenStream.close(); |
| |
| return bestFragments; |
| } |
| |
| protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream, |
| String text, int maxNumFragments ) |
| throws IOException |
| { |
| Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) ); |
| |
| highlighter.setTextFragmenter( new OneLineFragmenter() ); |
| |
| maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check |
| |
| TextFragment[] frag; |
| // Get text |
| ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments ); |
| |
| try |
| { |
| frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments ); |
| |
| for ( int i = 0; i < frag.length; i++ ) |
| { |
| if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) ) |
| { |
| fragTexts.add( frag[i].toString() ); |
| } |
| } |
| } |
| catch ( InvalidTokenOffsetsException e ) |
| { |
| // empty? |
| } |
| |
| return fragTexts; |
| } |
| |
| protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr ) |
| { |
| return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) ); |
| } |
| |
| private static int readerIndex( int n, int[] starts, int numSubReaders ) |
| { // find reader for doc n: |
| int lo = 0; // search starts array |
| int hi = numSubReaders - 1; // for first element less |
| |
| while ( hi >= lo ) |
| { |
| int mid = ( lo + hi ) >>> 1; |
| int midValue = starts[mid]; |
| if ( n < midValue ) |
| { |
| hi = mid - 1; |
| } |
| else if ( n > midValue ) |
| { |
| lo = mid + 1; |
| } |
| else |
| { // found a match |
| while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue ) |
| { |
| mid++; // scan to last match |
| } |
| return mid; |
| } |
| } |
| return hi; |
| } |
| } |