| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.maven.index; |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| import java.util.ArrayList; |
| import java.util.Iterator; |
| import java.util.List; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CachingTokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.search.Explanation; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.Query; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.search.highlight.Formatter; |
| import org.apache.lucene.search.highlight.Highlighter; |
| import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; |
| import org.apache.lucene.search.highlight.QueryScorer; |
| import org.apache.lucene.search.highlight.SimpleHTMLFormatter; |
| import org.apache.lucene.search.highlight.TextFragment; |
| import org.apache.maven.index.context.IndexUtils; |
| import org.apache.maven.index.context.IndexingContext; |
| import org.apache.maven.index.context.NexusIndexMultiSearcher; |
| import org.apache.maven.index.creator.JarFileContentsIndexCreator; |
| |
| /** |
| * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this! |
| * |
| * @author cstamas |
| */ |
| public class DefaultIteratorResultSet implements IteratorResultSet { |
| private final IteratorSearchRequest searchRequest; |
| |
| private final NexusIndexMultiSearcher indexSearcher; |
| |
| private final List<IndexingContext> contexts; |
| |
| private final int[] starts; |
| |
| private final ArtifactInfoFilter filter; |
| |
| private final ArtifactInfoPostprocessor postprocessor; |
| |
| private final List<MatchHighlightRequest> matchHighlightRequests; |
| |
| private final TopDocs hits; |
| |
| private final int from; |
| |
| private final int count; |
| |
| private final int maxRecPointer; |
| |
| private int pointer; |
| |
| private int processedArtifactInfoCount; |
| |
| private ArtifactInfo ai; |
| |
| protected DefaultIteratorResultSet( |
| final IteratorSearchRequest request, |
| final NexusIndexMultiSearcher indexSearcher, |
| final List<IndexingContext> contexts, |
| final TopDocs hits) |
| throws IOException { |
| this.searchRequest = request; |
| |
| this.indexSearcher = indexSearcher; |
| |
| this.contexts = contexts; |
| |
| { |
| int maxDoc = 0; |
| this.starts = new int[contexts.size() + 1]; // build starts array |
| // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence |
| // #acquire() already invoked on underlying NexusIndexMultiReader |
| final List<IndexSearcher> acquiredSearchers = |
| indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers(); |
| for (int i = 0; i < contexts.size(); i++) { |
| starts[i] = maxDoc; |
| maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc(); // compute maxDocs |
| } |
| starts[contexts.size()] = maxDoc; |
| } |
| |
| this.filter = request.getArtifactInfoFilter(); |
| |
| this.postprocessor = request.getArtifactInfoPostprocessor(); |
| |
| this.matchHighlightRequests = request.getMatchHighlightRequests(); |
| |
| List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>(); |
| for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) { |
| Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader()); |
| matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode())); |
| } |
| |
| this.hits = hits; |
| |
| this.from = request.getStart(); |
| |
| this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED |
| ? hits.scoreDocs.length |
| : Math.min(request.getCount(), hits.scoreDocs.length)); |
| |
| this.pointer = from; |
| |
| this.processedArtifactInfoCount = 0; |
| |
| this.maxRecPointer = from + count; |
| |
| ai = createNextAi(); |
| |
| if (ai == null) { |
| cleanUp(); |
| } |
| } |
| |
| public boolean hasNext() { |
| return ai != null; |
| } |
| |
| public ArtifactInfo next() { |
| ArtifactInfo result = ai; |
| |
| try { |
| ai = createNextAi(); |
| } catch (IOException e) { |
| ai = null; |
| |
| throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e); |
| } finally { |
| if (ai == null) { |
| cleanUp(); |
| } |
| } |
| |
| return result; |
| } |
| |
| public void remove() { |
| throw new UnsupportedOperationException( |
| "Method not supported on " + getClass().getName()); |
| } |
| |
| public Iterator<ArtifactInfo> iterator() { |
| return this; |
| } |
| |
| public void close() { |
| cleanUp(); |
| } |
| |
| public int getTotalProcessedArtifactInfoCount() { |
| return processedArtifactInfoCount; |
| } |
| |
| @Override |
| public void finalize() throws Throwable { |
| super.finalize(); |
| |
| if (!cleanedUp) { |
| System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query " |
| + searchRequest.getQuery().toString()); |
| |
| cleanUp(); |
| } |
| } |
| |
| // == |
| |
| protected ArtifactInfo createNextAi() throws IOException { |
| ArtifactInfo result = null; |
| |
| // we should stop if: |
| // a) we found what we want |
| // b) pointer advanced over more documents that user requested |
| // c) pointer advanced over more documents that hits has |
| // or we found what we need |
| while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) { |
| Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc); |
| |
| IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc); |
| |
| result = IndexUtils.constructArtifactInfo(doc, context); |
| |
| if (result != null) { |
| // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION |
| // For debugging only!!! |
| if (searchRequest.isLuceneExplain()) { |
| result.getAttributes() |
| .put( |
| Explanation.class.getName(), |
| indexSearcher |
| .explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc) |
| .toString()); |
| } |
| |
| result.setLuceneScore(hits.scoreDocs[pointer].score); |
| |
| result.setRepository(context.getRepositoryId()); |
| |
| result.setContext(context.getId()); |
| |
| if (filter != null) { |
| if (!filter.accepts(context, result)) { |
| result = null; |
| } |
| } |
| |
| if (result != null && postprocessor != null) { |
| postprocessor.postprocess(context, result); |
| } |
| |
| if (result != null && matchHighlightRequests.size() > 0) { |
| calculateHighlights(context, doc, result); |
| } |
| } |
| |
| pointer++; |
| processedArtifactInfoCount++; |
| } |
| |
| return result; |
| } |
| |
| private volatile boolean cleanedUp = false; |
| |
| protected synchronized void cleanUp() { |
| if (cleanedUp) { |
| return; |
| } |
| |
| try { |
| indexSearcher.release(); |
| } catch (IOException e) { |
| throw new IllegalStateException(e); |
| } |
| |
| this.cleanedUp = true; |
| } |
| |
| /** |
| * Creates the MatchHighlights and adds them to ArtifactInfo if found/can. |
| * |
| * @param context |
| * @param d |
| * @param ai |
| */ |
| protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException { |
| IndexerField field; |
| |
| String text; |
| |
| List<String> highlightFragment; |
| |
| for (MatchHighlightRequest hr : matchHighlightRequests) { |
| field = selectStoredIndexerField(hr.getField()); |
| |
| if (field != null) { |
| text = ai.getFieldValue(field.getOntology()); |
| |
| if (text != null) { |
| highlightFragment = highlightField(context, hr, field, text); |
| |
| if (highlightFragment != null && highlightFragment.size() > 0) { |
| MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment); |
| |
| ai.getMatchHighlights().add(matchHighlight); |
| } |
| } |
| } |
| } |
| } |
| |
| /** |
| * Select a STORED IndexerField assigned to passed in Field. |
| * |
| * @param field |
| * @return |
| */ |
| protected IndexerField selectStoredIndexerField(Field field) { |
| // hack here |
| if (MAVEN.CLASSNAMES.equals(field)) { |
| return JarFileContentsIndexCreator.FLD_CLASSNAMES; |
| } else { |
| return field.getIndexerFields().isEmpty() |
| ? null |
| : field.getIndexerFields().iterator().next(); |
| } |
| } |
| |
| /** |
| * Returns a string that contains match fragment highlighted in style as user requested. |
| * |
| * @param context |
| * @param hr |
| * @param field |
| * @param text |
| * @return |
| * @throws IOException |
| */ |
| protected List<String> highlightField( |
| IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException { |
| // exception with classnames |
| if (MAVEN.CLASSNAMES.equals(field.getOntology())) { |
| text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n"); |
| } |
| |
| Analyzer analyzer = context.getAnalyzer(); |
| TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text)); |
| |
| CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream); |
| |
| Formatter formatter; |
| |
| if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) { |
| formatter = new SimpleHTMLFormatter(); |
| } else { |
| tokenStream.reset(); |
| tokenStream.end(); |
| tokenStream.close(); |
| throw new UnsupportedOperationException( |
| "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!"); |
| } |
| |
| List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3); |
| |
| return bestFragments; |
| } |
| |
| protected final List<String> getBestFragments( |
| Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments) |
| throws IOException { |
| Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query)); |
| |
| highlighter.setTextFragmenter(new OneLineFragmenter()); |
| |
| maxNumFragments = Math.max(1, maxNumFragments); // sanity check |
| |
| TextFragment[] frag; |
| // Get text |
| ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments); |
| |
| try { |
| frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments); |
| |
| for (TextFragment textFragment : frag) { |
| if ((textFragment != null) && (textFragment.getScore() > 0)) { |
| fragTexts.add(textFragment.toString()); |
| } |
| } |
| } catch (InvalidTokenOffsetsException e) { |
| // empty? |
| } |
| |
| return fragTexts; |
| } |
| |
| protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) { |
| return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size())); |
| } |
| |
| private static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n: |
| int lo = 0; // search starts array |
| int hi = numSubReaders - 1; // for first element less |
| |
| while (hi >= lo) { |
| int mid = (lo + hi) >>> 1; |
| int midValue = starts[mid]; |
| if (n < midValue) { |
| hi = mid - 1; |
| } else if (n > midValue) { |
| lo = mid + 1; |
| } else { // found a match |
| while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) { |
| mid++; // scan to last match |
| } |
| return mid; |
| } |
| } |
| return hi; |
| } |
| } |