solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.solr.search.join;

 import java.io.IOException;
 import java.util.Arrays;

 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.OrdinalMap;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.LongValues;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.request.DocValuesFacets;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.SolrIndexSearcher;

 /**
  * This class is responsible for collecting block join facet counts for particular field
  */
 class BlockJoinFieldFacetAccumulator {
   private String fieldName;
   private FieldType fieldType;
   private int currentSegment = -1;
   // for term lookups only
   private SortedSetDocValues topSSDV;
   private int[] globalCounts;
   private SortedSetDocValues segmentSSDV;
   // elems are : facet value counter<<32 | last parent doc num
   private long[] segmentAccums = new long[0];
   // for mapping per-segment ords to global ones
   private OrdinalMap ordinalMap;
   private SchemaField schemaField;
   private SortedDocValues segmentSDV;

   BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException {
     this.fieldName = fieldName;
     schemaField = searcher.getSchema().getField(fieldName);
     fieldType = schemaField.getType();
     ordinalMap = null;
     if (schemaField.multiValued()) {
       topSSDV = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
       if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) {
         ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping;
       }
     } else {
       SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
       if (single instanceof MultiDocValues.MultiSortedDocValues) {
         ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
       }
       if (single != null) {
         topSSDV = DocValues.singleton(single);
       }
     }
   }

   private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException {
     segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName);
     segmentAccums  = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1
     // zero counts, -1 parent
     Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL);
     segmentSDV = DocValues.unwrapSingleton(segmentSSDV);
     return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"??
   }

   interface AggregatableDocIter extends DocIterator {
     void reset();
     /** a key to aggregate the current document */
     int getAggKey();

   }
   static class SortedIntsAggDocIterator implements AggregatableDocIter {
     private int[] childDocs;
     private int childCount;
     private int parentDoc;
     private int pos=-1;

     public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) {
       this.childDocs = childDocs;
       this.childCount = childCount;
       this.parentDoc = parentDoc;
     }


     @Override
     public boolean hasNext() {
       return pos<childCount;
     }

     @Override
     public Integer next() {
       return nextDoc();
     }

     @Override
     public int nextDoc() {
       return childDocs[pos++];
     }

     @Override
     public float score() {
       return 0;
     }
     @Override
     public void reset() {
       pos=0;
     }
     @Override
     public int getAggKey(){
       return parentDoc;
     }
   }

   void updateCountsWithMatchedBlock(AggregatableDocIter iter) throws IOException {
     if (segmentSDV != null) {
       // some codecs may optimize SORTED_SET storage for single-valued fields
       for (iter.reset(); iter.hasNext(); ) {
         final int docNum = iter.nextDoc();
         if (docNum > segmentSDV.docID()) {
           segmentSDV.advance(docNum);
         }

         int term;
         if (docNum == segmentSDV.docID()) {
           term = segmentSDV.ordValue();
         } else {
           term = -1;
         }
         accumulateTermOrd(term, iter.getAggKey());
         //System.out.println("doc# "+docNum+" "+fieldName+" term# "+term+" tick "+Long.toHexString(segmentAccums[1+term]));
       }
     } else {
       for (iter.reset(); iter.hasNext(); ) {
         final int docNum = iter.nextDoc();
         if (docNum > segmentSSDV.docID()) {
           segmentSSDV.advance(docNum);
         }
         if (docNum == segmentSSDV.docID()) {
           int term = (int) segmentSSDV.nextOrd();
           do { // absent values are designated by term=-1, first iteration counts [0] as "missing", and exit, otherwise it spins
             accumulateTermOrd(term, iter.getAggKey());
           } while (term>=0 && (term = (int) segmentSSDV.nextOrd()) >= 0);
         }
       }
     }
   }

   String getFieldName() {
     return fieldName;
   }

   /** copy paste from {@link DocValuesFacets} */
   NamedList<Integer> getFacetValue() throws IOException {
     NamedList<Integer> facetValue = new NamedList<>();
     final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones
     for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) {
       int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32);
       if (count > 0) {
         BytesRef term = topSSDV.lookupOrd(-1 + i);
         fieldType.indexedToReadable(term, charsRef);
         facetValue.add(charsRef.toString(), count);
       }
     }
     return facetValue;
   }

   // @todo we can track in max term nums to loop only changed range while migrating and labeling
   private void accumulateTermOrd(int term, int parentDoc) {
     long accum = segmentAccums[1+term];
     if(((int)(accum & 0xffffffffL))!=parentDoc)
     {// incrementing older 32, reset smaller 32, set them to the new parent
       segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)|parentDoc;
     }
   }

   void setNextReader(LeafReaderContext context) throws IOException {
     initSegmentData(fieldName, context);
     currentSegment = context.ord;
   }

   void migrateGlobal(){
     if (currentSegment<0 // no hits
         || segmentAccums.length==0
         || ordinalMap==null) { // single segment
       return;
     }

     if(globalCounts==null){
       // it might be just a single segment
         globalCounts = new int[(int) ordinalMap.getValueCount()+ /*[0] for missing*/1];
     }else{
       assert currentSegment>=0;
     }

     migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap);
   }

   /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts)
    * copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/
   void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) {

     final LongValues ordMap = map.getGlobalOrds(subIndex);
     // missing count
     counts[0] += (int) (segCounts[0]>>32);

     // migrate actual ordinals
     for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) {
       int count = (int) (segCounts[ord]>>32);
       if (count != 0) {
         counts[1+(int) ordMap.get(ord-1)] += count;
       }
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.solr.search.join;

	import java.io.IOException;
	import java.util.Arrays;

	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.LeafReaderContext;
	import org.apache.lucene.index.MultiDocValues;
	import org.apache.lucene.index.OrdinalMap;
	import org.apache.lucene.index.SortedDocValues;
	import org.apache.lucene.index.SortedSetDocValues;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.CharsRefBuilder;
	import org.apache.lucene.util.LongValues;
	import org.apache.solr.common.util.NamedList;
	import org.apache.solr.request.DocValuesFacets;
	import org.apache.solr.schema.FieldType;
	import org.apache.solr.schema.SchemaField;
	import org.apache.solr.search.DocIterator;
	import org.apache.solr.search.SolrIndexSearcher;

	/**
	* This class is responsible for collecting block join facet counts for particular field
	*/
	class BlockJoinFieldFacetAccumulator {
	private String fieldName;
	private FieldType fieldType;
	private int currentSegment = -1;
	// for term lookups only
	private SortedSetDocValues topSSDV;
	private int[] globalCounts;
	private SortedSetDocValues segmentSSDV;
	// elems are : facet value counter<<32 \| last parent doc num
	private long[] segmentAccums = new long[0];
	// for mapping per-segment ords to global ones
	private OrdinalMap ordinalMap;
	private SchemaField schemaField;
	private SortedDocValues segmentSDV;

	BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException {
	this.fieldName = fieldName;
	schemaField = searcher.getSchema().getField(fieldName);
	fieldType = schemaField.getType();
	ordinalMap = null;
	if (schemaField.multiValued()) {
	topSSDV = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
	if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) {
	ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping;
	}
	} else {
	SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
	if (single instanceof MultiDocValues.MultiSortedDocValues) {
	ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
	}
	if (single != null) {
	topSSDV = DocValues.singleton(single);
	}
	}
	}

	private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException {
	segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName);
	segmentAccums = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1
	// zero counts, -1 parent
	Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL);
	segmentSDV = DocValues.unwrapSingleton(segmentSSDV);
	return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"??
	}

	interface AggregatableDocIter extends DocIterator {
	void reset();
	/** a key to aggregate the current document */
	int getAggKey();

	}
	static class SortedIntsAggDocIterator implements AggregatableDocIter {
	private int[] childDocs;
	private int childCount;
	private int parentDoc;
	private int pos=-1;

	public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) {
	this.childDocs = childDocs;
	this.childCount = childCount;
	this.parentDoc = parentDoc;
	}


	@Override
	public boolean hasNext() {
	return pos<childCount;
	}

	@Override
	public Integer next() {
	return nextDoc();
	}

	@Override
	public int nextDoc() {
	return childDocs[pos++];
	}

	@Override
	public float score() {
	return 0;
	}
	@Override
	public void reset() {
	pos=0;
	}
	@Override
	public int getAggKey(){
	return parentDoc;
	}
	}

	void updateCountsWithMatchedBlock(AggregatableDocIter iter) throws IOException {
	if (segmentSDV != null) {
	// some codecs may optimize SORTED_SET storage for single-valued fields
	for (iter.reset(); iter.hasNext(); ) {
	final int docNum = iter.nextDoc();
	if (docNum > segmentSDV.docID()) {
	segmentSDV.advance(docNum);
	}

	int term;
	if (docNum == segmentSDV.docID()) {
	term = segmentSDV.ordValue();
	} else {
	term = -1;
	}
	accumulateTermOrd(term, iter.getAggKey());
	//System.out.println("doc# "+docNum+" "+fieldName+" term# "+term+" tick "+Long.toHexString(segmentAccums[1+term]));
	}
	} else {
	for (iter.reset(); iter.hasNext(); ) {
	final int docNum = iter.nextDoc();
	if (docNum > segmentSSDV.docID()) {
	segmentSSDV.advance(docNum);
	}
	if (docNum == segmentSSDV.docID()) {
	int term = (int) segmentSSDV.nextOrd();
	do { // absent values are designated by term=-1, first iteration counts [0] as "missing", and exit, otherwise it spins
	accumulateTermOrd(term, iter.getAggKey());
	} while (term>=0 && (term = (int) segmentSSDV.nextOrd()) >= 0);
	}
	}
	}
	}

	String getFieldName() {
	return fieldName;
	}

	/** copy paste from {@link DocValuesFacets} */
	NamedList<Integer> getFacetValue() throws IOException {
	NamedList<Integer> facetValue = new NamedList<>();
	final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones
	for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) {
	int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32);
	if (count > 0) {
	BytesRef term = topSSDV.lookupOrd(-1 + i);
	fieldType.indexedToReadable(term, charsRef);
	facetValue.add(charsRef.toString(), count);
	}
	}
	return facetValue;
	}

	// @todo we can track in max term nums to loop only changed range while migrating and labeling
	private void accumulateTermOrd(int term, int parentDoc) {
	long accum = segmentAccums[1+term];
	if(((int)(accum & 0xffffffffL))!=parentDoc)
	{// incrementing older 32, reset smaller 32, set them to the new parent
	segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)\|parentDoc;
	}
	}

	void setNextReader(LeafReaderContext context) throws IOException {
	initSegmentData(fieldName, context);
	currentSegment = context.ord;
	}

	void migrateGlobal(){
	if (currentSegment<0 // no hits
	\|\| segmentAccums.length==0
	\|\| ordinalMap==null) { // single segment
	return;
	}

	if(globalCounts==null){
	// it might be just a single segment
	globalCounts = new int[(int) ordinalMap.getValueCount()+ /[0] for missing/1];
	}else{
	assert currentSegment>=0;
	}

	migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap);
	}

	/** folds counts in segment ordinal space (segCounts) into global ordinal space (counts)
	* copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/
	void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) {

	final LongValues ordMap = map.getGlobalOrds(subIndex);
	// missing count
	counts[0] += (int) (segCounts[0]>>32);

	// migrate actual ordinals
	for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) {
	int count = (int) (segCounts[ord]>>32);
	if (count != 0) {
	counts[1+(int) ordMap.get(ord-1)] += count;
	}
	}
	}
	}