indexer-core/src/main/java/org/apache/maven/index/DefaultIteratorResultSet.java - maven-indexer - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.maven.index;

 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CachingTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.highlight.Formatter;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TextFragment;
 import org.apache.maven.index.context.IndexUtils;
 import org.apache.maven.index.context.IndexingContext;
 import org.apache.maven.index.context.NexusIndexMultiSearcher;
 import org.apache.maven.index.creator.JarFileContentsIndexCreator;

 /**
  * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
  *
  * @author cstamas
  */
 public class DefaultIteratorResultSet implements IteratorResultSet {
     private final IteratorSearchRequest searchRequest;

     private final NexusIndexMultiSearcher indexSearcher;

     private final List<IndexingContext> contexts;

     private final int[] starts;

     private final ArtifactInfoFilter filter;

     private final ArtifactInfoPostprocessor postprocessor;

     private final List<MatchHighlightRequest> matchHighlightRequests;

     private final TopDocs hits;

     private final int from;

     private final int count;

     private final int maxRecPointer;

     private int pointer;

     private int processedArtifactInfoCount;

     private ArtifactInfo ai;

     protected DefaultIteratorResultSet(
             final IteratorSearchRequest request,
             final NexusIndexMultiSearcher indexSearcher,
             final List<IndexingContext> contexts,
             final TopDocs hits)
             throws IOException {
         this.searchRequest = request;

         this.indexSearcher = indexSearcher;

         this.contexts = contexts;

         {
             int maxDoc = 0;
             this.starts = new int[contexts.size() + 1]; // build starts array
             // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
             // #acquire() already invoked on underlying NexusIndexMultiReader
             final List<IndexSearcher> acquiredSearchers =
                     indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
             for (int i = 0; i < contexts.size(); i++) {
                 starts[i] = maxDoc;
                 maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc(); // compute maxDocs
             }
             starts[contexts.size()] = maxDoc;
         }

         this.filter = request.getArtifactInfoFilter();

         this.postprocessor = request.getArtifactInfoPostprocessor();

         this.matchHighlightRequests = request.getMatchHighlightRequests();

         List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
         for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) {
             Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader());
             matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode()));
         }

         this.hits = hits;

         this.from = request.getStart();

         this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED
                 ? hits.scoreDocs.length
                 : Math.min(request.getCount(), hits.scoreDocs.length));

         this.pointer = from;

         this.processedArtifactInfoCount = 0;

         this.maxRecPointer = from + count;

         ai = createNextAi();

         if (ai == null) {
             cleanUp();
         }
     }

     public boolean hasNext() {
         return ai != null;
     }

     public ArtifactInfo next() {
         ArtifactInfo result = ai;

         try {
             ai = createNextAi();
         } catch (IOException e) {
             ai = null;

             throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e);
         } finally {
             if (ai == null) {
                 cleanUp();
             }
         }

         return result;
     }

     public void remove() {
         throw new UnsupportedOperationException(
                 "Method not supported on " + getClass().getName());
     }

     public Iterator<ArtifactInfo> iterator() {
         return this;
     }

     public void close() {
         cleanUp();
     }

     public int getTotalProcessedArtifactInfoCount() {
         return processedArtifactInfoCount;
     }

     @Override
     public void finalize() throws Throwable {
         super.finalize();

         if (!cleanedUp) {
             System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query "
                     + searchRequest.getQuery().toString());

             cleanUp();
         }
     }

     // ==

     protected ArtifactInfo createNextAi() throws IOException {
         ArtifactInfo result = null;

         // we should stop if:
         // a) we found what we want
         // b) pointer advanced over more documents that user requested
         // c) pointer advanced over more documents that hits has
         // or we found what we need
         while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) {
             Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc);

             IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc);

             result = IndexUtils.constructArtifactInfo(doc, context);

             if (result != null) {
                 // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
                 // For debugging only!!!
                 if (searchRequest.isLuceneExplain()) {
                     result.getAttributes()
                             .put(
                                     Explanation.class.getName(),
                                     indexSearcher
                                             .explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc)
                                             .toString());
                 }

                 result.setLuceneScore(hits.scoreDocs[pointer].score);

                 result.setRepository(context.getRepositoryId());

                 result.setContext(context.getId());

                 if (filter != null) {
                     if (!filter.accepts(context, result)) {
                         result = null;
                     }
                 }

                 if (result != null && postprocessor != null) {
                     postprocessor.postprocess(context, result);
                 }

                 if (result != null && matchHighlightRequests.size() > 0) {
                     calculateHighlights(context, doc, result);
                 }
             }

             pointer++;
             processedArtifactInfoCount++;
         }

         return result;
     }

     private volatile boolean cleanedUp = false;

     protected synchronized void cleanUp() {
         if (cleanedUp) {
             return;
         }

         try {
             indexSearcher.release();
         } catch (IOException e) {
             throw new IllegalStateException(e);
         }

         this.cleanedUp = true;
     }

     /**
      * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
      *
      * @param context
      * @param d
      * @param ai
      */
     protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException {
         IndexerField field;

         String text;

         List<String> highlightFragment;

         for (MatchHighlightRequest hr : matchHighlightRequests) {
             field = selectStoredIndexerField(hr.getField());

             if (field != null) {
                 text = ai.getFieldValue(field.getOntology());

                 if (text != null) {
                     highlightFragment = highlightField(context, hr, field, text);

                     if (highlightFragment != null && highlightFragment.size() > 0) {
                         MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment);

                         ai.getMatchHighlights().add(matchHighlight);
                     }
                 }
             }
         }
     }

     /**
      * Select a STORED IndexerField assigned to passed in Field.
      *
      * @param field
      * @return
      */
     protected IndexerField selectStoredIndexerField(Field field) {
         // hack here
         if (MAVEN.CLASSNAMES.equals(field)) {
             return JarFileContentsIndexCreator.FLD_CLASSNAMES;
         } else {
             return field.getIndexerFields().isEmpty()
                     ? null
                     : field.getIndexerFields().iterator().next();
         }
     }

     /**
      * Returns a string that contains match fragment highlighted in style as user requested.
      *
      * @param context
      * @param hr
      * @param field
      * @param text
      * @return
      * @throws IOException
      */
     protected List<String> highlightField(
             IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException {
         // exception with classnames
         if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
             text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
         }

         Analyzer analyzer = context.getAnalyzer();
         TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));

         CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);

         Formatter formatter;

         if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
             formatter = new SimpleHTMLFormatter();
         } else {
             tokenStream.reset();
             tokenStream.end();
             tokenStream.close();
             throw new UnsupportedOperationException(
                     "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
         }

         List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);

         return bestFragments;
     }

     protected final List<String> getBestFragments(
             Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments)
             throws IOException {
         Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));

         highlighter.setTextFragmenter(new OneLineFragmenter());

         maxNumFragments = Math.max(1, maxNumFragments); // sanity check

         TextFragment[] frag;
         // Get text
         ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);

         try {
             frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);

             for (TextFragment textFragment : frag) {
                 if ((textFragment != null) && (textFragment.getScore() > 0)) {
                     fragTexts.add(textFragment.toString());
                 }
             }
         } catch (InvalidTokenOffsetsException e) {
             // empty?
         }

         return fragTexts;
     }

     protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) {
         return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size()));
     }

     private static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n:
         int lo = 0; // search starts array
         int hi = numSubReaders - 1; // for first element less

         while (hi >= lo) {
             int mid = (lo + hi) >>> 1;
             int midValue = starts[mid];
             if (n < midValue) {
                 hi = mid - 1;
             } else if (n > midValue) {
                 lo = mid + 1;
             } else { // found a match
                 while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
                     mid++; // scan to last match
                 }
                 return mid;
             }
         }
         return hi;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.maven.index;

	import java.io.IOException;
	import java.io.StringReader;
	import java.util.ArrayList;
	import java.util.Iterator;
	import java.util.List;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.CachingTokenFilter;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.search.Explanation;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.search.highlight.Formatter;
	import org.apache.lucene.search.highlight.Highlighter;
	import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
	import org.apache.lucene.search.highlight.QueryScorer;
	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
	import org.apache.lucene.search.highlight.TextFragment;
	import org.apache.maven.index.context.IndexUtils;
	import org.apache.maven.index.context.IndexingContext;
	import org.apache.maven.index.context.NexusIndexMultiSearcher;
	import org.apache.maven.index.creator.JarFileContentsIndexCreator;

	/**
	* Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
	*
	* @author cstamas
	*/
	public class DefaultIteratorResultSet implements IteratorResultSet {
	private final IteratorSearchRequest searchRequest;

	private final NexusIndexMultiSearcher indexSearcher;

	private final List<IndexingContext> contexts;

	private final int[] starts;

	private final ArtifactInfoFilter filter;

	private final ArtifactInfoPostprocessor postprocessor;

	private final List<MatchHighlightRequest> matchHighlightRequests;

	private final TopDocs hits;

	private final int from;

	private final int count;

	private final int maxRecPointer;

	private int pointer;

	private int processedArtifactInfoCount;

	private ArtifactInfo ai;

	protected DefaultIteratorResultSet(
	final IteratorSearchRequest request,
	final NexusIndexMultiSearcher indexSearcher,
	final List<IndexingContext> contexts,
	final TopDocs hits)
	throws IOException {
	this.searchRequest = request;

	this.indexSearcher = indexSearcher;

	this.contexts = contexts;

	{
	int maxDoc = 0;
	this.starts = new int[contexts.size() + 1]; // build starts array
	// this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
	// #acquire() already invoked on underlying NexusIndexMultiReader
	final List<IndexSearcher> acquiredSearchers =
	indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
	for (int i = 0; i < contexts.size(); i++) {
	starts[i] = maxDoc;
	maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc(); // compute maxDocs
	}
	starts[contexts.size()] = maxDoc;
	}

	this.filter = request.getArtifactInfoFilter();

	this.postprocessor = request.getArtifactInfoPostprocessor();

	this.matchHighlightRequests = request.getMatchHighlightRequests();

	List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
	for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) {
	Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader());
	matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode()));
	}

	this.hits = hits;

	this.from = request.getStart();

	this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED
	? hits.scoreDocs.length
	: Math.min(request.getCount(), hits.scoreDocs.length));

	this.pointer = from;

	this.processedArtifactInfoCount = 0;

	this.maxRecPointer = from + count;

	ai = createNextAi();

	if (ai == null) {
	cleanUp();
	}
	}

	public boolean hasNext() {
	return ai != null;
	}

	public ArtifactInfo next() {
	ArtifactInfo result = ai;

	try {
	ai = createNextAi();
	} catch (IOException e) {
	ai = null;

	throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e);
	} finally {
	if (ai == null) {
	cleanUp();
	}
	}

	return result;
	}

	public void remove() {
	throw new UnsupportedOperationException(
	"Method not supported on " + getClass().getName());
	}

	public Iterator<ArtifactInfo> iterator() {
	return this;
	}

	public void close() {
	cleanUp();
	}

	public int getTotalProcessedArtifactInfoCount() {
	return processedArtifactInfoCount;
	}

	@Override
	public void finalize() throws Throwable {
	super.finalize();

	if (!cleanedUp) {
	System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query "
	+ searchRequest.getQuery().toString());

	cleanUp();
	}
	}

	// ==

	protected ArtifactInfo createNextAi() throws IOException {
	ArtifactInfo result = null;

	// we should stop if:
	// a) we found what we want
	// b) pointer advanced over more documents that user requested
	// c) pointer advanced over more documents that hits has
	// or we found what we need
	while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) {
	Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc);

	IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc);

	result = IndexUtils.constructArtifactInfo(doc, context);

	if (result != null) {
	// WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
	// For debugging only!!!
	if (searchRequest.isLuceneExplain()) {
	result.getAttributes()
	.put(
	Explanation.class.getName(),
	indexSearcher
	.explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc)
	.toString());
	}

	result.setLuceneScore(hits.scoreDocs[pointer].score);

	result.setRepository(context.getRepositoryId());

	result.setContext(context.getId());

	if (filter != null) {
	if (!filter.accepts(context, result)) {
	result = null;
	}
	}

	if (result != null && postprocessor != null) {
	postprocessor.postprocess(context, result);
	}

	if (result != null && matchHighlightRequests.size() > 0) {
	calculateHighlights(context, doc, result);
	}
	}

	pointer++;
	processedArtifactInfoCount++;
	}

	return result;
	}

	private volatile boolean cleanedUp = false;

	protected synchronized void cleanUp() {
	if (cleanedUp) {
	return;
	}

	try {
	indexSearcher.release();
	} catch (IOException e) {
	throw new IllegalStateException(e);
	}

	this.cleanedUp = true;
	}

	/**
	* Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
	*
	* @param context
	* @param d
	* @param ai
	*/
	protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException {
	IndexerField field;

	String text;

	List<String> highlightFragment;

	for (MatchHighlightRequest hr : matchHighlightRequests) {
	field = selectStoredIndexerField(hr.getField());

	if (field != null) {
	text = ai.getFieldValue(field.getOntology());

	if (text != null) {
	highlightFragment = highlightField(context, hr, field, text);

	if (highlightFragment != null && highlightFragment.size() > 0) {
	MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment);

	ai.getMatchHighlights().add(matchHighlight);
	}
	}
	}
	}
	}

	/**
	* Select a STORED IndexerField assigned to passed in Field.
	*
	* @param field
	* @return
	*/
	protected IndexerField selectStoredIndexerField(Field field) {
	// hack here
	if (MAVEN.CLASSNAMES.equals(field)) {
	return JarFileContentsIndexCreator.FLD_CLASSNAMES;
	} else {
	return field.getIndexerFields().isEmpty()
	? null
	: field.getIndexerFields().iterator().next();
	}
	}

	/**
	* Returns a string that contains match fragment highlighted in style as user requested.
	*
	* @param context
	* @param hr
	* @param field
	* @param text
	* @return
	* @throws IOException
	*/
	protected List<String> highlightField(
	IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException {
	// exception with classnames
	if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
	text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
	}

	Analyzer analyzer = context.getAnalyzer();
	TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));

	CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);

	Formatter formatter;

	if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
	formatter = new SimpleHTMLFormatter();
	} else {
	tokenStream.reset();
	tokenStream.end();
	tokenStream.close();
	throw new UnsupportedOperationException(
	"Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
	}

	List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);

	return bestFragments;
	}

	protected final List<String> getBestFragments(
	Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments)
	throws IOException {
	Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));

	highlighter.setTextFragmenter(new OneLineFragmenter());

	maxNumFragments = Math.max(1, maxNumFragments); // sanity check

	TextFragment[] frag;
	// Get text
	ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);

	try {
	frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);

	for (TextFragment textFragment : frag) {
	if ((textFragment != null) && (textFragment.getScore() > 0)) {
	fragTexts.add(textFragment.toString());
	}
	}
	} catch (InvalidTokenOffsetsException e) {
	// empty?
	}

	return fragTexts;
	}

	protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) {
	return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size()));
	}

	private static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n:
	int lo = 0; // search starts array
	int hi = numSubReaders - 1; // for first element less

	while (hi >= lo) {
	int mid = (lo + hi) >>> 1;
	int midValue = starts[mid];
	if (n < midValue) {
	hi = mid - 1;
	} else if (n > midValue) {
	lo = mid + 1;
	} else { // found a match
	while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
	mid++; // scan to last match
	}
	return mid;
	}
	}
	return hi;
	}
	}