| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.search.join; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.index.MultiDocValues; |
| import org.apache.lucene.index.OrdinalMap; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CharsRefBuilder; |
| import org.apache.lucene.util.LongValues; |
| import org.apache.solr.common.util.NamedList; |
| import org.apache.solr.request.DocValuesFacets; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.SchemaField; |
| import org.apache.solr.search.DocIterator; |
| import org.apache.solr.search.SolrIndexSearcher; |
| |
| /** |
| * This class is responsible for collecting block join facet counts for particular field |
| */ |
| class BlockJoinFieldFacetAccumulator { |
| private String fieldName; |
| private FieldType fieldType; |
| private int currentSegment = -1; |
| // for term lookups only |
| private SortedSetDocValues topSSDV; |
| private int[] globalCounts; |
| private SortedSetDocValues segmentSSDV; |
| // elems are : facet value counter<<32 | last parent doc num |
| private long[] segmentAccums = new long[0]; |
| // for mapping per-segment ords to global ones |
| private OrdinalMap ordinalMap; |
| private SchemaField schemaField; |
| private SortedDocValues segmentSDV; |
| |
| BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException { |
| this.fieldName = fieldName; |
| schemaField = searcher.getSchema().getField(fieldName); |
| fieldType = schemaField.getType(); |
| ordinalMap = null; |
| if (schemaField.multiValued()) { |
| topSSDV = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName); |
| if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) { |
| ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping; |
| } |
| } else { |
| SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName); |
| if (single instanceof MultiDocValues.MultiSortedDocValues) { |
| ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping; |
| } |
| if (single != null) { |
| topSSDV = DocValues.singleton(single); |
| } |
| } |
| } |
| |
| private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException { |
| segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName); |
| segmentAccums = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1 |
| // zero counts, -1 parent |
| Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL); |
| segmentSDV = DocValues.unwrapSingleton(segmentSSDV); |
| return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"?? |
| } |
| |
| interface AggregatableDocIter extends DocIterator { |
| void reset(); |
| /** a key to aggregate the current document */ |
| int getAggKey(); |
| |
| } |
| static class SortedIntsAggDocIterator implements AggregatableDocIter { |
| private int[] childDocs; |
| private int childCount; |
| private int parentDoc; |
| private int pos=-1; |
| |
| public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) { |
| this.childDocs = childDocs; |
| this.childCount = childCount; |
| this.parentDoc = parentDoc; |
| } |
| |
| |
| @Override |
| public boolean hasNext() { |
| return pos<childCount; |
| } |
| |
| @Override |
| public Integer next() { |
| return nextDoc(); |
| } |
| |
| @Override |
| public int nextDoc() { |
| return childDocs[pos++]; |
| } |
| |
| @Override |
| public float score() { |
| return 0; |
| } |
| @Override |
| public void reset() { |
| pos=0; |
| } |
| @Override |
| public int getAggKey(){ |
| return parentDoc; |
| } |
| } |
| |
| void updateCountsWithMatchedBlock(AggregatableDocIter iter) throws IOException { |
| if (segmentSDV != null) { |
| // some codecs may optimize SORTED_SET storage for single-valued fields |
| for (iter.reset(); iter.hasNext(); ) { |
| final int docNum = iter.nextDoc(); |
| if (docNum > segmentSDV.docID()) { |
| segmentSDV.advance(docNum); |
| } |
| |
| int term; |
| if (docNum == segmentSDV.docID()) { |
| term = segmentSDV.ordValue(); |
| } else { |
| term = -1; |
| } |
| accumulateTermOrd(term, iter.getAggKey()); |
| //System.out.println("doc# "+docNum+" "+fieldName+" term# "+term+" tick "+Long.toHexString(segmentAccums[1+term])); |
| } |
| } else { |
| for (iter.reset(); iter.hasNext(); ) { |
| final int docNum = iter.nextDoc(); |
| if (docNum > segmentSSDV.docID()) { |
| segmentSSDV.advance(docNum); |
| } |
| if (docNum == segmentSSDV.docID()) { |
| int term = (int) segmentSSDV.nextOrd(); |
| do { // absent values are designated by term=-1, first iteration counts [0] as "missing", and exit, otherwise it spins |
| accumulateTermOrd(term, iter.getAggKey()); |
| } while (term>=0 && (term = (int) segmentSSDV.nextOrd()) >= 0); |
| } |
| } |
| } |
| } |
| |
| String getFieldName() { |
| return fieldName; |
| } |
| |
| /** copy paste from {@link DocValuesFacets} */ |
| NamedList<Integer> getFacetValue() throws IOException { |
| NamedList<Integer> facetValue = new NamedList<>(); |
| final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones |
| for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) { |
| int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32); |
| if (count > 0) { |
| BytesRef term = topSSDV.lookupOrd(-1 + i); |
| fieldType.indexedToReadable(term, charsRef); |
| facetValue.add(charsRef.toString(), count); |
| } |
| } |
| return facetValue; |
| } |
| |
| // @todo we can track in max term nums to loop only changed range while migrating and labeling |
| private void accumulateTermOrd(int term, int parentDoc) { |
| long accum = segmentAccums[1+term]; |
| if(((int)(accum & 0xffffffffL))!=parentDoc) |
| {// incrementing older 32, reset smaller 32, set them to the new parent |
| segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)|parentDoc; |
| } |
| } |
| |
| void setNextReader(LeafReaderContext context) throws IOException { |
| initSegmentData(fieldName, context); |
| currentSegment = context.ord; |
| } |
| |
| void migrateGlobal(){ |
| if (currentSegment<0 // no hits |
| || segmentAccums.length==0 |
| || ordinalMap==null) { // single segment |
| return; |
| } |
| |
| if(globalCounts==null){ |
| // it might be just a single segment |
| globalCounts = new int[(int) ordinalMap.getValueCount()+ /*[0] for missing*/1]; |
| }else{ |
| assert currentSegment>=0; |
| } |
| |
| migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap); |
| } |
| |
| /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) |
| * copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/ |
| void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) { |
| |
| final LongValues ordMap = map.getGlobalOrds(subIndex); |
| // missing count |
| counts[0] += (int) (segCounts[0]>>32); |
| |
| // migrate actual ordinals |
| for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) { |
| int count = (int) (segCounts[ord]>>32); |
| if (count != 0) { |
| counts[1+(int) ordMap.get(ord-1)] += count; |
| } |
| } |
| } |
| } |